diff --git a/Cargo.toml b/Cargo.toml index 00c0ecf6..18f94cd0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -61,3 +61,6 @@ path = "query-translator" [dependencies.mentat_tx_parser] path = "tx-parser" + +[profile.release] +debug = true diff --git a/db/src/bootstrap.rs b/db/src/bootstrap.rs index e8611615..22af6bb0 100644 --- a/db/src/bootstrap.rs +++ b/db/src/bootstrap.rs @@ -251,6 +251,6 @@ pub fn bootstrap_entities() -> Vec { // Failure here is a coding error (since the inputs are fixed), not a runtime error. // TODO: represent these bootstrap data errors rather than just panicing. - let bootstrap_entities: Vec = mentat_tx_parser::Tx::parse(bootstrap_assertions.with_spans()).unwrap(); + let bootstrap_entities: Vec = mentat_tx_parser::Tx::parse(&bootstrap_assertions.with_spans()).unwrap(); return bootstrap_entities; } diff --git a/db/src/db.rs b/db/src/db.rs index ca8ddd4e..9aa773b9 100644 --- a/db/src/db.rs +++ b/db/src/db.rs @@ -1154,7 +1154,7 @@ mod tests { fn transact(&mut self, transaction: I) -> Result where I: Borrow { // Failure to parse the transaction is a coding error, so we unwrap. let assertions = edn::parse::value(transaction.borrow()).expect(format!("to be able to parse {} into EDN", transaction.borrow()).as_str()); - let entities: Vec<_> = mentat_tx_parser::Tx::parse(assertions.clone()).expect(format!("to be able to parse {} into entities", assertions).as_str()); + let entities: Vec<_> = mentat_tx_parser::Tx::parse(&assertions).expect(format!("to be able to parse {} into entities", assertions).as_str()); let details = { // The block scopes the borrow of self.sqlite. diff --git a/edn/src/types.rs b/edn/src/types.rs index 241664ec..332713cc 100644 --- a/edn/src/types.rs +++ b/edn/src/types.rs @@ -110,9 +110,21 @@ impl ValueAndSpan { } } + pub fn as_atom(&self) -> Option<&ValueAndSpan> { + if self.inner.is_atom() { + Some(self) + } else { + None + } + } + pub fn into_text(self) -> Option { self.inner.into_text() } + + pub fn as_text(&self) -> Option<&String> { + self.inner.as_text() + } } impl Value { diff --git a/parser-utils/Cargo.toml b/parser-utils/Cargo.toml index c020fdbd..8eeebf0f 100644 --- a/parser-utils/Cargo.toml +++ b/parser-utils/Cargo.toml @@ -5,7 +5,8 @@ authors = ["Victor Porof ", "Richard Newman for more -/// illumination. -/// Nothing about this is specific to the result type of the parser. -pub type ResultParser = Expected ParseResult>>; - -/// `assert_parses_to!` simplifies some of the boilerplate around running a -/// parser function against input and expecting a certain result. -#[macro_export] -macro_rules! assert_parses_to { - ( $parser: expr, $input: expr, $expected: expr ) => {{ - let par = $parser(); - let result = par.skip(eof()).parse($input.with_spans().into_atom_stream()).map(|x| x.0); - assert_eq!(result, Ok($expected)); - }} -} - -/// `assert_edn_parses_to!` simplifies some of the boilerplate around running a parser function -/// against string input and expecting a certain result. -#[macro_export] -macro_rules! assert_edn_parses_to { - ( $parser: expr, $input: expr, $expected: expr ) => {{ - let par = $parser(); - let input = edn::parse::value($input).expect("to be able to parse input as EDN"); - let result = par.skip(eof()).parse(input.into_atom_stream()).map(|x| x.0); - assert_eq!(result, Ok($expected)); - }} -} - -/// `satisfy_unwrap!` makes it a little easier to implement a `satisfy_map` -/// body that matches a particular `Value` enum case, otherwise returning `None`. -#[macro_export] -macro_rules! satisfy_unwrap { - ( $cas: path, $var: ident, $body: block ) => { - satisfy_map(|x: edn::Value| if let $cas($var) = x $body else { None }) - } -} - -/// Generate a `satisfy_map` expression that matches a `PlainSymbol` -/// value with the given name. -/// -/// We do this rather than using `combine::token` so that we don't -/// need to allocate a new `String` inside a `PlainSymbol` inside a `Value` -/// just to match input. -#[macro_export] -macro_rules! matches_plain_symbol { - ($name: expr, $input: ident) => { - satisfy_map(|x: edn::Value| { - if let edn::Value::PlainSymbol(ref s) = x { - if s.0.as_str() == $name { - return Some(()); - } - } - return None; - }).parse_stream($input) - } -} - -/// Define an `impl` body for the `$parser` type. The body will contain a parser -/// function called `$name`, consuming a stream of `$item_type`s. The parser's -/// result type will be `$result_type`. -/// -/// The provided `$body` will be evaluated with `$input` bound to the input stream. -/// -/// `$body`, when run, should return a `ParseResult` of the appropriate result type. -#[macro_export] -macro_rules! def_parser_fn { - ( $parser: ident, $name: ident, $item_type: ty, $result_type: ty, $input: ident, $body: block ) => { - impl $parser where I: Stream { - fn $name() -> ResultParser<$result_type, I> { - fn inner>($input: I) -> ParseResult<$result_type, I> { - $body - } - parser(inner as fn(I) -> ParseResult<$result_type, I>).expected(stringify!($name)) - } - } - } -} - -#[macro_export] -macro_rules! def_parser { - ( $parser: ident, $name: ident, $result_type: ty, $body: block ) => { - impl $parser { - fn $name() -> ResultParser<$result_type, $crate::value_and_span::Stream> { - fn inner(input: $crate::value_and_span::Stream) -> ParseResult<$result_type, $crate::value_and_span::Stream> { - $body.parse_lazy(input).into() - } - parser(inner as fn($crate::value_and_span::Stream) -> ParseResult<$result_type, $crate::value_and_span::Stream>).expected(stringify!($name)) - } - } - } -} - -/// `def_value_parser_fn` is a short-cut to `def_parser_fn` with the input type -/// being `edn::Value`. -#[macro_export] -macro_rules! def_value_parser_fn { - ( $parser: ident, $name: ident, $result_type: ty, $input: ident, $body: block ) => { - def_parser_fn!($parser, $name, edn::Value, $result_type, $input, $body); - } -} - -/// `def_value_satisfy_parser_fn` is a short-cut to `def_parser_fn` with the input type -/// being `edn::Value` and the body being a call to `satisfy_map` with the given transformer. -/// -/// In practice this allows you to simply pass a function that accepts an `&edn::Value` and -/// returns an `Option<$result_type>`: if a suitable value is at the front of the stream, -/// it will be converted and returned by the parser; otherwise, the parse will fail. -#[macro_export] -macro_rules! def_value_satisfy_parser_fn { - ( $parser: ident, $name: ident, $result_type: ty, $transformer: path ) => { - def_value_parser_fn!($parser, $name, $result_type, input, { - satisfy_map(|x: edn::Value| $transformer(&x)).parse_stream(input) - }); - } -} +extern crate itertools; /// A `ValueParseError` is a `combine::primitives::ParseError`-alike that implements the `Debug`, /// `Display`, and `std::error::Error` traits. In addition, it doesn't capture references, making @@ -152,11 +20,29 @@ macro_rules! def_value_satisfy_parser_fn { /// `Display`; rather than introducing a newtype like `DisplayVec`, we re-use `edn::Value::Vector`. #[derive(PartialEq)] pub struct ValueParseError { - pub position: usize, + pub position: edn::Span, // Think of this as `Vec>>`; see above. - pub errors: Vec>, + pub errors: Vec>, } +#[macro_use] +pub mod macros; + +pub use macros::{ + KeywordMapParser, + ResultParser, +}; + +pub mod log; +pub mod value_and_span; +pub use value_and_span::{ + Stream, +}; + +pub use log::{ + LogParsing, +}; + impl std::fmt::Debug for ValueParseError { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, @@ -168,7 +54,7 @@ impl std::fmt::Debug for ValueParseError { impl std::fmt::Display for ValueParseError { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - try!(writeln!(f, "Parse error at {}", self.position)); + try!(writeln!(f, "Parse error at {:?}", self.position)); combine::primitives::Error::fmt_errors(&self.errors, f) } } @@ -179,49 +65,13 @@ impl std::error::Error for ValueParseError { } } -impl<'a> From> for ValueParseError { - fn from(e: combine::primitives::ParseError<&'a [edn::Value]>) -> ValueParseError { +impl<'a> From>> for ValueParseError { + fn from(e: combine::primitives::ParseError>) -> ValueParseError { ValueParseError { - position: e.position, - errors: e.errors.into_iter().map(|e| e.map_range(|r| { - let mut v = Vec::new(); - v.extend_from_slice(r); - edn::Value::Vector(v) - })).collect(), - } - } -} - -/// Allow to map the range types of combine::primitives::{Info, Error}. -trait MapRange { - type Output; - fn map_range(self, f: F) -> Self::Output where F: FnOnce(R) -> S; -} - -impl MapRange for combine::primitives::Info { - type Output = combine::primitives::Info; - - fn map_range(self, f: F) -> combine::primitives::Info where F: FnOnce(R) -> S { - use combine::primitives::Info::*; - match self { - Token(t) => Token(t), - Range(r) => Range(f(r)), - Owned(s) => Owned(s), - Borrowed(x) => Borrowed(x), - } - } -} - -impl MapRange for combine::primitives::Error { - type Output = combine::primitives::Error; - - fn map_range(self, f: F) -> combine::primitives::Error where F: FnOnce(R) -> S { - use combine::primitives::Error::*; - match self { - Unexpected(x) => Unexpected(x.map_range(f)), - Expected(x) => Expected(x.map_range(f)), - Message(x) => Message(x.map_range(f)), - Other(x) => Other(x), + position: e.position.0, + errors: e.errors.into_iter() + .map(|e| e.map_token(|t| t.clone()).map_range(|r| r.clone())) + .collect(), } } } diff --git a/parser-utils/src/macros.rs b/parser-utils/src/macros.rs new file mode 100644 index 00000000..8dd9dbd2 --- /dev/null +++ b/parser-utils/src/macros.rs @@ -0,0 +1,137 @@ +// Copyright 2016 Mozilla +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use +// this file except in compliance with the License. You may obtain a copy of the +// License at http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// use combine::{ +// ParseResult, +// }; +// use combine::combinator::{ +// Expected, +// FnParser, +// }; + +use combine::{ + ParseResult, +}; +// use combine::primitives; // To not shadow Error. +// use combine::primitives::{ +// Consumed, +// FastResult, +// }; +use combine::combinator::{ + Expected, + FnParser, +}; + +/// A type definition for a function parser that either parses an `O` from an input stream of type +/// `I`, or fails with an "expected" failure. +/// See for more +/// illumination. +/// Nothing about this is specific to the result type of the parser. +pub type ResultParser = Expected ParseResult>>; + +pub struct KeywordMapParser(pub T); + +/// `satisfy_unwrap!` makes it a little easier to implement a `satisfy_map` +/// body that matches a particular `Value` enum case, otherwise returning `None`. +#[macro_export] +macro_rules! satisfy_unwrap { + ( $cas: path, $var: ident, $body: block ) => { + satisfy_map(|x: edn::Value| if let $cas($var) = x $body else { None }) + } +} + +/// Generate a `satisfy_map` expression that matches a `PlainSymbol` +/// value with the given name. +/// +/// We do this rather than using `combine::token` so that we don't +/// need to allocate a new `String` inside a `PlainSymbol` inside a `Value` +/// just to match input. +#[macro_export] +macro_rules! matches_plain_symbol { + ($name: expr, $input: ident) => { + satisfy_map(|x: edn::Value| { + if let edn::Value::PlainSymbol(ref s) = x { + if s.0.as_str() == $name { + return Some(()); + } + } + return None; + }).parse_stream($input) + } +} + +#[macro_export] +macro_rules! def_parser { + ( $parser: ident, $name: ident, $result_type: ty, $body: block ) => { + impl<'p> $parser<'p> { + fn $name<'a>() -> ResultParser<$result_type, $crate::value_and_span::Stream<'a>> { + fn inner<'a>(input: $crate::value_and_span::Stream<'a>) -> ParseResult<$result_type, $crate::value_and_span::Stream<'a>> { + $body.parse_lazy(input).into() + } + parser(inner as fn($crate::value_and_span::Stream<'a>) -> ParseResult<$result_type, $crate::value_and_span::Stream<'a>>).expected(stringify!($name)) + } + } + } +} + +/// `assert_parses_to!` simplifies some of the boilerplate around running a +/// parser function against input and expecting a certain result. +#[macro_export] +macro_rules! assert_parses_to { + ( $parser: expr, $input: expr, $expected: expr ) => {{ + let input = $input.with_spans(); + let par = $parser(); + let stream = input.atom_stream(); + let result = par.skip(eof()).parse(stream).map(|x| x.0); + assert_eq!(result, Ok($expected)); + }} +} + +/// `assert_edn_parses_to!` simplifies some of the boilerplate around running a parser function +/// against string input and expecting a certain result. +#[macro_export] +macro_rules! assert_edn_parses_to { + ( $parser: expr, $input: expr, $expected: expr ) => {{ + let input = edn::parse::value($input).expect("to be able to parse input as EDN"); + let par = $parser(); + let stream = input.atom_stream(); + let result = par.skip(eof()).parse(stream).map(|x| x.0); + assert_eq!(result, Ok($expected)); + }} +} + +/// `assert_parse_failure_contains!` simplifies running a parser function against string input and +/// expecting a certain failure. This is working around the complexity of pattern matching parse +/// errors that contain spans. +#[macro_export] +macro_rules! assert_parse_failure_contains { + ( $parser: expr, $input: expr, $expected: expr ) => {{ + let input = edn::parse::value($input).expect("to be able to parse input as EDN"); + let par = $parser(); + let stream = input.atom_stream(); + let result = par.skip(eof()).parse(stream).map(|x| x.0).map_err(|e| -> ::ValueParseError { e.into() }); + assert!(format!("{:?}", result).contains($expected), "Expected {:?} to contain {:?}", result, $expected); + }} +} + +#[macro_export] +macro_rules! keyword_map_of { + ($(($keyword:expr, $value:expr)),+) => {{ + let mut seen = std::collections::BTreeSet::default(); + + $( + if !seen.insert($keyword) { + panic!("keyword map has repeated key: {}", stringify!($keyword)); + } + )+ + + KeywordMapParser(($(($keyword, $value)),+)) + }} +} diff --git a/parser-utils/src/value_and_span.rs b/parser-utils/src/value_and_span.rs index 13d9e7e5..20b9de3c 100644 --- a/parser-utils/src/value_and_span.rs +++ b/parser-utils/src/value_and_span.rs @@ -8,13 +8,15 @@ // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. +#![allow(dead_code)] + use std; +use std::cmp::Ordering; use std::fmt::{ Debug, Display, Formatter, }; -use std::cmp::Ordering; use combine::{ ConsumedResult, @@ -22,15 +24,11 @@ use combine::{ Parser, ParseResult, StreamOnce, - many, - many1, parser, - satisfy, satisfy_map, }; use combine::primitives; // To not shadow Error. use combine::primitives::{ - Consumed, FastResult, }; use combine::combinator::{ @@ -40,9 +38,13 @@ use combine::combinator::{ use edn; +use macros::{ + KeywordMapParser, +}; + /// A wrapper to let us order `edn::Span` in whatever way is appropriate for parsing with `combine`. #[derive(Clone, Copy, Debug)] -pub struct SpanPosition(edn::Span); +pub struct SpanPosition(pub edn::Span); impl Display for SpanPosition { fn fmt(&self, f: &mut Formatter) -> ::std::fmt::Result { @@ -76,29 +78,37 @@ impl Ord for SpanPosition { /// yielding `ValueAndSpan` items, which allows us to yield uniform `combine::ParseError` types from /// disparate parsers. #[derive(Clone)] -pub enum IntoIter { - Empty(std::iter::Empty), - Atom(std::iter::Once), - Vector(std::vec::IntoIter), - List(std::collections::linked_list::IntoIter), - /// Iterates via a single `flat_map` [k1, v1, k2, v2, ...]. - Map(std::vec::IntoIter), +pub enum Iter<'a> { + Empty, + Atom(std::iter::Once<&'a edn::ValueAndSpan>), + Vector(std::slice::Iter<'a, edn::ValueAndSpan>), + List(std::collections::linked_list::Iter<'a, edn::ValueAndSpan>), + /// Iterates a map {:k1 v1, :k2 v2, ...} as a single `flat_map` slice [k1, v1, k2, v2, ...]. + Map(std::iter::FlatMap, + std::iter::Chain, std::iter::Once<&'a edn::ValueAndSpan>>, + fn((&'a edn::ValueAndSpan, &'a edn::ValueAndSpan)) -> std::iter::Chain, std::iter::Once<&'a edn::ValueAndSpan>>>), + /// Iterates a map with vector values {:k1 [v11 v12 ...], :k2 [v21 v22 ...], ...} as a single + /// flattened map [k1, v11, v12, ..., k2, v21, v22, ...]. + KeywordMap(std::iter::FlatMap, + std::iter::Chain, Box>>, + fn((&'a edn::ValueAndSpan, &'a edn::ValueAndSpan)) -> std::iter::Chain, Box>>>), // TODO: Support Set and Map more naturally. This is significantly more work because the // existing BTreeSet and BTreeMap iterators do not implement Clone, and implementing Clone for // them is involved. Since we don't really need to parse sets and maps at this time, this will // do for now. } -impl Iterator for IntoIter { - type Item = edn::ValueAndSpan; +impl<'a> Iterator for Iter<'a> { + type Item = &'a edn::ValueAndSpan; fn next(&mut self) -> Option { match *self { - IntoIter::Empty(ref mut i) => i.next(), - IntoIter::Atom(ref mut i) => i.next(), - IntoIter::Vector(ref mut i) => i.next(), - IntoIter::List(ref mut i) => i.next(), - IntoIter::Map(ref mut i) => i.next(), + Iter::Empty => None, + Iter::Atom(ref mut i) => i.next(), + Iter::Vector(ref mut i) => i.next(), + Iter::List(ref mut i) => i.next(), + Iter::Map(ref mut i) => i.next(), + Iter::KeywordMap(ref mut i) => i.next(), } } } @@ -107,11 +117,11 @@ impl Iterator for IntoIter { /// to `combine::IteratorStream` as produced by `combine::from_iter`, but specialized to /// `edn::ValueAndSpan`. #[derive(Clone)] -pub struct Stream(IntoIter, SpanPosition); +pub struct Stream<'a>(Iter<'a>, SpanPosition); /// Things specific to parsing with `combine` and our `Stream` that need a trait to live outside of /// the `edn` crate. -pub trait Item: Clone + PartialEq + Sized { +pub trait Item<'a>: Clone + PartialEq + Sized { /// Position could be specialized to `SpanPosition`. type Position: Clone + Ord + std::fmt::Display; @@ -120,13 +130,16 @@ pub trait Item: Clone + PartialEq + Sized { fn start(&self) -> Self::Position; fn update_position(&self, &mut Self::Position); - fn into_child_stream_iter(self) -> IntoIter; - fn into_child_stream(self) -> Stream; - fn into_atom_stream_iter(self) -> IntoIter; - fn into_atom_stream(self) -> Stream; + fn child_iter(&'a self) -> Iter<'a>; + fn child_stream(&'a self) -> Stream<'a>; + fn atom_iter(&'a self) -> Iter<'a>; + fn atom_stream(&'a self) -> Stream<'a>; + + fn keyword_map_iter(&'a self) -> Iter<'a>; + fn keyword_map_stream(&'a self) -> Stream<'a>; } -impl Item for edn::ValueAndSpan { +impl<'a> Item<'a> for edn::ValueAndSpan { type Position = SpanPosition; fn start(&self) -> Self::Position { @@ -137,28 +150,48 @@ impl Item for edn::ValueAndSpan { *position = SpanPosition(self.span.clone()) } - fn into_child_stream_iter(self) -> IntoIter { - match self.inner { - edn::SpannedValue::Vector(values) => IntoIter::Vector(values.into_iter()), - edn::SpannedValue::List(values) => IntoIter::List(values.into_iter()), - // Parsing pairs with `combine` is tricky; parsing sequences is easy. - edn::SpannedValue::Map(map) => IntoIter::Map(map.into_iter().flat_map(|(a, v)| std::iter::once(a).chain(std::iter::once(v))).collect::>().into_iter()), - _ => IntoIter::Empty(std::iter::empty()), + fn keyword_map_iter(&'a self) -> Iter<'a> { + fn flatten_k_vector<'a>((k, v): (&'a edn::ValueAndSpan, &'a edn::ValueAndSpan)) -> std::iter::Chain, Box>> { + std::iter::once(k).chain(Box::new(v.child_iter())) + } + + match self.inner.as_map() { + Some(ref map) => Iter::KeywordMap(map.iter().flat_map(flatten_k_vector)), + None => Iter::Empty } } - fn into_child_stream(self) -> Stream { + fn keyword_map_stream(&'a self) -> Stream<'a> { let span = self.span.clone(); - Stream(self.into_child_stream_iter(), SpanPosition(span)) + Stream(self.keyword_map_iter(), SpanPosition(span)) } - fn into_atom_stream_iter(self) -> IntoIter { - IntoIter::Atom(std::iter::once(self)) + fn child_iter(&'a self) -> Iter<'a> { + fn flatten_k_v<'a>((k, v): (&'a edn::ValueAndSpan, &'a edn::ValueAndSpan)) -> std::iter::Chain, std::iter::Once<&'a edn::ValueAndSpan>> { + std::iter::once(k).chain(std::iter::once(v)) + } + + match self.inner { + edn::SpannedValue::Vector(ref values) => Iter::Vector(values.iter()), + edn::SpannedValue::List(ref values) => Iter::List(values.iter()), + // Parsing pairs with `combine` is tricky; parsing sequences is easy. + edn::SpannedValue::Map(ref map) => Iter::Map(map.iter().flat_map(flatten_k_v)), + _ => Iter::Empty, + } } - fn into_atom_stream(self) -> Stream { + fn child_stream(&'a self) -> Stream<'a> { let span = self.span.clone(); - Stream(self.into_atom_stream_iter(), SpanPosition(span)) + Stream(self.child_iter(), SpanPosition(span)) + } + + fn atom_iter(&'a self) -> Iter<'a> { + Iter::Atom(std::iter::once(self)) + } + + fn atom_stream(&'a self) -> Stream<'a> { + let span = self.span.clone(); + Stream(self.atom_iter(), SpanPosition(span)) } } @@ -174,9 +207,26 @@ impl Item for edn::ValueAndSpan { #[derive(Clone)] pub struct OfExactly(P, N); -impl Parser for OfExactly - where P: Parser, - N: Parser, +pub trait Streaming<'a> { + fn as_stream(self) -> Stream<'a>; +} + +impl<'a> Streaming<'a> for &'a edn::ValueAndSpan { + fn as_stream(self) -> Stream<'a> { + self.child_stream() + } +} + +impl<'a> Streaming<'a> for Stream<'a> { + fn as_stream(self) -> Stream<'a> { + self + } +} + +impl<'a, P, N, M, O> Parser for OfExactly + where P: Parser, Output=M>, + N: Parser, Output=O>, + M: 'a + Streaming<'a>, { type Input = P::Input; type Output = O; @@ -186,7 +236,7 @@ impl Parser for OfExactly match self.0.parse_lazy(input) { ConsumedOk((outer_value, outer_input)) => { - match self.1.parse_lazy(outer_value.into_child_stream()) { + match self.1.parse_lazy(outer_value.as_stream()) { ConsumedOk((inner_value, mut inner_input)) | EmptyOk((inner_value, mut inner_input)) => { match inner_input.uncons() { Err(ref err) if *err == primitives::Error::end_of_input() => ConsumedOk((inner_value, outer_input)), @@ -200,7 +250,7 @@ impl Parser for OfExactly } }, EmptyOk((outer_value, outer_input)) => { - match self.1.parse_lazy(outer_value.into_child_stream()) { + match self.1.parse_lazy(outer_value.as_stream()) { ConsumedOk((inner_value, mut inner_input)) | EmptyOk((inner_value, mut inner_input)) => { match inner_input.uncons() { Err(ref err) if *err == primitives::Error::end_of_input() => EmptyOk((inner_value, outer_input)), @@ -222,9 +272,10 @@ impl Parser for OfExactly } #[inline(always)] -pub fn of_exactly(p: P, n: N) -> OfExactly - where P: Parser, - N: Parser, +pub fn of_exactly<'a, P, N, M, O>(p: P, n: N) -> OfExactly + where P: Parser, Output=M>, + N: Parser, Output=O>, + M: 'a + Streaming<'a>, { OfExactly(p, n) } @@ -236,8 +287,9 @@ pub trait OfExactlyParsing: Parser + Sized { N: Parser; } -impl

OfExactlyParsing for P - where P: Parser +impl<'a, P, M> OfExactlyParsing for P + where P: Parser, Output=M>, + M: 'a + Streaming<'a>, { fn of_exactly(self, n: N) -> OfExactly where N: Parser @@ -247,10 +299,10 @@ impl

OfExactlyParsing for P } /// Equivalent to `combine::IteratorStream`. -impl StreamOnce for Stream +impl<'a> StreamOnce for Stream<'a> { - type Item = edn::ValueAndSpan; - type Range = edn::ValueAndSpan; + type Item = &'a edn::ValueAndSpan; + type Range = &'a edn::ValueAndSpan; type Position = SpanPosition; #[inline] @@ -272,84 +324,132 @@ impl StreamOnce for Stream /// Shorthands, just enough to convert the `mentat_db` crate for now. Written using `Box` for now: /// it's simple and we can address allocation issues if and when they surface. -pub fn vector() -> Box> { - satisfy(|v: edn::ValueAndSpan| v.inner.is_vector()).boxed() -} - -pub fn list() -> Box> { - satisfy(|v: edn::ValueAndSpan| v.inner.is_list()).boxed() -} - -pub fn map() -> Box> { - satisfy(|v: edn::ValueAndSpan| v.inner.is_map()).boxed() -} - -pub fn seq() -> Box> { - satisfy(|v: edn::ValueAndSpan| v.inner.is_list() || v.inner.is_vector()).boxed() -} - -pub fn integer() -> Box> { - satisfy_map(|v: edn::ValueAndSpan| v.inner.as_integer()).boxed() -} - -pub fn namespaced_keyword() -> Box> { - satisfy_map(|v: edn::ValueAndSpan| v.inner.as_namespaced_keyword().cloned()).boxed() -} - -/// Like `combine::token()`, but compare an `edn::Value` to an `edn::ValueAndSpan`. -pub fn value(value: edn::Value) -> Box> { - // TODO: make this comparison faster. Right now, we drop all the spans; if we walked the value - // trees together, we could avoid creating garbage. - satisfy(move |v: edn::ValueAndSpan| value == v.inner.into()).boxed() -} - -fn keyword_map_(input: Stream) -> ParseResult -{ - // One run is a keyword followed by one or more non-keywords. - let run = (satisfy(|v: edn::ValueAndSpan| v.inner.is_keyword()), - many1(satisfy(|v: edn::ValueAndSpan| !v.inner.is_keyword())) - .map(|vs: Vec| { - // TODO: extract "spanning". - let beg = vs.first().unwrap().span.0; - let end = vs.last().unwrap().span.1; - edn::ValueAndSpan { - inner: edn::SpannedValue::Vector(vs), - span: edn::Span(beg, end), - } - })); - - let mut runs = vector().of_exactly(many::, _>(run)); - - let (data, input) = try!(runs.parse_lazy(input).into()); - - let mut m: std::collections::BTreeMap = std::collections::BTreeMap::default(); - for (k, vs) in data { - if m.insert(k, vs).is_some() { - // TODO: improve this message. - return Err(Consumed::Empty(ParseError::from_errors(input.into_inner().position(), Vec::new()))) +pub fn vector_<'a>(input: Stream<'a>) -> ParseResult, Stream<'a>> { + satisfy_map(|v: &'a edn::ValueAndSpan| { + if v.inner.is_vector() { + Some(v.child_stream()) + } else { + None } - } - - let map = edn::ValueAndSpan { - inner: edn::SpannedValue::Map(m), - span: edn::Span(0, 0), // TODO: fix this. - }; - - Ok((map, input)) + }) + .parse_lazy(input) + .into() } -/// Turn a vector of keywords and non-keyword values into a map. As an example, turn -/// ```edn -/// [:keyword1 value1 value2 ... :keyword2 value3 value4 ...] -/// ``` -/// into -/// ```edn -/// {:keyword1 [value1 value2 ...] :keyword2 [value3 value4 ...]} -/// ```. -pub fn keyword_map() -> Expected ParseResult>> -{ - // The `as` work arounds https://github.com/rust-lang/rust/issues/20178. - parser(keyword_map_ as fn(Stream) -> ParseResult).expected("keyword map") +pub fn vector<'a>() -> Expected, fn(Stream<'a>) -> ParseResult, Stream<'a>>>> { + parser(vector_ as fn(Stream<'a>) -> ParseResult, Stream<'a>>).expected("vector") +} + +pub fn list_<'a>(input: Stream<'a>) -> ParseResult, Stream<'a>> { + satisfy_map(|v: &'a edn::ValueAndSpan| { + if v.inner.is_list() { + Some(v.child_stream()) + } else { + None + } + }) + .parse_lazy(input) + .into() +} + +pub fn list<'a>() -> Expected, fn(Stream<'a>) -> ParseResult, Stream<'a>>>> { + parser(list_ as fn(Stream<'a>) -> ParseResult, Stream<'a>>).expected("list") +} + +pub fn seq_<'a>(input: Stream<'a>) -> ParseResult, Stream<'a>> { + satisfy_map(|v: &'a edn::ValueAndSpan| { + if v.inner.is_list() || v.inner.is_vector() { + Some(v.child_stream()) + } else { + None + } + }) + .parse_lazy(input) + .into() +} + +pub fn seq<'a>() -> Expected, fn(Stream<'a>) -> ParseResult, Stream<'a>>>> { + parser(seq_ as fn(Stream<'a>) -> ParseResult, Stream<'a>>).expected("vector|list") +} + +pub fn map_<'a>(input: Stream<'a>) -> ParseResult, Stream<'a>> { + satisfy_map(|v: &'a edn::ValueAndSpan| { + if v.inner.is_map() { + Some(v.child_stream()) + } else { + None + } + }) + .parse_lazy(input) + .into() +} + +pub fn map<'a>() -> Expected, fn(Stream<'a>) -> ParseResult, Stream<'a>>>> { + parser(map_ as fn(Stream<'a>) -> ParseResult, Stream<'a>>).expected("map") +} + +/// A `[k v]` pair in the map form of a keyword map must have the shape `[:k, [v1, v2, ...]]`, with +/// none of `v1`, `v2`, ... a keyword: without loss of generality, we cannot represent the case +/// where `vn` is a keyword `:l`, since `[:k v1 v2 ... :l]`, isn't a valid keyword map in vector +/// form. This function tests that a `[k v]` pair obeys these constraints. +/// +/// If we didn't test this, then we might flatten a map `[:k [:l]] to `[:k :l]`, which isn't a valid +/// keyword map in vector form. +pub fn is_valid_keyword_map_k_v<'a>((k, v): (&'a edn::ValueAndSpan, &'a edn::ValueAndSpan)) -> bool { + if !k.inner.is_keyword() { + return false; + } + match v.inner.as_vector() { + None => { + return false; + }, + Some(ref vs) => { + if !vs.iter().all(|vv| !vv.inner.is_keyword()) { + return false; + } + }, + } + return true; +} + +pub fn keyword_map_<'a>(input: Stream<'a>) -> ParseResult, Stream<'a>> { + satisfy_map(|v: &'a edn::ValueAndSpan| { + v.inner.as_map().and_then(|map| { + if map.iter().all(is_valid_keyword_map_k_v) { + println!("yes {:?}", map); + Some(v.keyword_map_stream()) + } else { + println!("no {:?}", map); + None + } + }) + }) + .parse_lazy(input) + .into() +} + +pub fn keyword_map<'a>() -> Expected, fn(Stream<'a>) -> ParseResult, Stream<'a>>>> { + parser(keyword_map_ as fn(Stream<'a>) -> ParseResult, Stream<'a>>).expected("keyword map") +} + +pub fn integer_<'a>(input: Stream<'a>) -> ParseResult> { + satisfy_map(|v: &'a edn::ValueAndSpan| v.inner.as_integer()) + .parse_lazy(input) + .into() +} + +pub fn integer<'a>() -> Expected, fn(Stream<'a>) -> ParseResult>>> { + parser(integer_ as fn(Stream<'a>) -> ParseResult>).expected("integer") +} + +pub fn namespaced_keyword_<'a>(input: Stream<'a>) -> ParseResult<&'a edn::NamespacedKeyword, Stream<'a>> { + satisfy_map(|v: &'a edn::ValueAndSpan| v.inner.as_namespaced_keyword()) + .parse_lazy(input) + .into() +} + +pub fn namespaced_keyword<'a>() -> Expected, fn(Stream<'a>) -> ParseResult<&'a edn::NamespacedKeyword, Stream<'a>>>> { + parser(namespaced_keyword_ as fn(Stream<'a>) -> ParseResult<&'a edn::NamespacedKeyword, Stream<'a>>).expected("namespaced_keyword") } /// Generate a `satisfy` expression that matches a `PlainSymbol` value with the given name. @@ -359,8 +459,8 @@ pub fn keyword_map() -> Expected ParseResult { - def_parser!($parser, $name, edn::ValueAndSpan, { - satisfy(|v: edn::ValueAndSpan| { + def_parser!($parser, $name, &'a edn::ValueAndSpan, { + satisfy(|v: &'a edn::ValueAndSpan| { match v.inner { edn::SpannedValue::PlainSymbol(ref s) => s.0.as_str() == $input, _ => false, @@ -376,8 +476,8 @@ macro_rules! def_matches_plain_symbol { #[macro_export] macro_rules! def_matches_keyword { ( $parser: ident, $name: ident, $input: expr ) => { - def_parser!($parser, $name, edn::ValueAndSpan, { - satisfy(|v: edn::ValueAndSpan| { + def_parser!($parser, $name, &'a edn::ValueAndSpan, { + satisfy(|v: &'a edn::ValueAndSpan| { match v.inner { edn::SpannedValue::Keyword(ref s) => s.0.as_str() == $input, _ => false, @@ -394,8 +494,8 @@ macro_rules! def_matches_keyword { #[macro_export] macro_rules! def_matches_namespaced_keyword { ( $parser: ident, $name: ident, $input_namespace: expr, $input_name: expr ) => { - def_parser!($parser, $name, edn::ValueAndSpan, { - satisfy(|v: edn::ValueAndSpan| { + def_parser!($parser, $name, &'a edn::ValueAndSpan, { + satisfy(|v: &'a edn::ValueAndSpan| { match v.inner { edn::SpannedValue::NamespacedKeyword(ref s) => s.namespace.as_str() == $input_namespace && s.name.as_str() == $input_name, _ => false, @@ -405,57 +505,248 @@ macro_rules! def_matches_namespaced_keyword { } } +use combine::primitives::{ + Error, + Info, +}; +use combine::primitives::FastResult::*; + +/// Compare to `tuple_parser!` in `combine`. +/// +/// This uses edge cases in Rust's hygienic macro system to represent arbitrary values. That is, +/// `$value: ident` represents both a type in the tuple parameterizing `KeywordMapParser` (since +/// `(A, B, C)` is a valid type declaration) and also a variable value extracted from the underlying +/// instance value. `$tmp: ident` represents an optional value to return. +/// +/// This unrolls the cases. Each loop iteration reads a token. It then unrolls the known cases, +/// checking if any case matches the keyword string. If yes, we parse further. If no, we move on +/// to the next case. If no case matches, we fail. +macro_rules! keyword_map_parser { + ($(($keyword:ident, $value:ident, $tmp:ident)),+) => { + impl <'a, $($value:),+> Parser for KeywordMapParser<($((&'static str, $value)),+)> + where $($value: Parser>),+ + { + type Input = Stream<'a>; + type Output = ($(Option<$value::Output>),+); + + #[allow(non_snake_case)] + fn parse_lazy(&mut self, + mut input: Stream<'a>) + -> ConsumedResult<($(Option<$value::Output>),+), Stream<'a>> { + let ($((ref $keyword, ref mut $value)),+) = (*self).0; + let mut consumed = false; + + $( + let mut $tmp = None; + )+ + + loop { + match input.uncons() { + Ok(value) => { + $( + if let Some(ref keyword) = value.inner.as_keyword() { + if keyword.0.as_str() == *$keyword { + if $tmp.is_some() { + // Repeated match -- bail out! Providing good error + // messages is hard; this will do for now. + return ConsumedErr(ParseError::new(input.position(), Error::Unexpected(Info::Token(value)))); + } + + consumed = true; + + $tmp = match $value.parse_lazy(input.clone()) { + ConsumedOk((x, new_input)) => { + input = new_input; + Some(x) + } + EmptyErr(mut err) => { + if let Ok(t) = input.uncons() { + err.add_error(Error::Unexpected(Info::Token(t))); + } + if consumed { + return ConsumedErr(err) + } else { + return EmptyErr(err) + } + } + ConsumedErr(err) => return ConsumedErr(err), + EmptyOk((x, new_input)) => { + input = new_input; + Some(x) + } + }; + + continue + } + } + )+ + + // No keyword matched! Bail out. + return ConsumedErr(ParseError::new(input.position(), Error::Unexpected(Info::Token(value)))); + }, + Err(err) => { + if consumed { + return ConsumedOk((($($tmp),+), input)) + } else { + if err == Error::end_of_input() { + return EmptyOk((($($tmp),+), input)); + } + return EmptyErr(ParseError::new(input.position(), err)) + } + }, + } + } + } + } + } +} + +keyword_map_parser!((Ak, Av, At), (Bk, Bv, Bt)); +keyword_map_parser!((Ak, Av, At), (Bk, Bv, Bt), (Ck, Cv, Ct)); +keyword_map_parser!((Ak, Av, At), (Bk, Bv, Bt), (Ck, Cv, Ct), (Dk, Dv, Dt)); +keyword_map_parser!((Ak, Av, At), (Bk, Bv, Bt), (Ck, Cv, Ct), (Dk, Dv, Dt), (Ek, Ev, Et)); +keyword_map_parser!((Ak, Av, At), (Bk, Bv, Bt), (Ck, Cv, Ct), (Dk, Dv, Dt), (Ek, Ev, Et), (Fk, Fv, Ft)); +keyword_map_parser!((Ak, Av, At), (Bk, Bv, Bt), (Ck, Cv, Ct), (Dk, Dv, Dt), (Ek, Ev, Et), (Fk, Fv, Ft), (Gk, Gv, Gt)); + #[cfg(test)] mod tests { - use combine::{eof}; + use combine::{ + eof, + many, + satisfy, + }; + use super::*; - /// Take a string `input` and a string `expected` and ensure that `input` parses to an - /// `edn::Value` keyword map equivalent to the `edn::Value` that `expected` parses to. - macro_rules! assert_keyword_map_eq { - ( $input: expr, $expected: expr ) => {{ - let input = edn::parse::value($input).expect("to be able to parse input EDN"); - let expected = $expected.map(|e| { - edn::parse::value(e).expect("to be able to parse expected EDN").without_spans() - }); - let mut par = keyword_map().map(|x| x.without_spans()).skip(eof()); - let result = par.parse(input.into_atom_stream()).map(|x| x.0); - assert_eq!(result.ok(), expected); - }} + use macros::{ + ResultParser, + }; + + /// A little test parser. + pub struct Test<'a>(std::marker::PhantomData<&'a ()>); + + def_matches_namespaced_keyword!(Test, add, "db", "add"); + + def_parser!(Test, entid, i64, { + integer() + .map(|x| x) + .or(namespaced_keyword().map(|_| -1)) + }); + + #[test] + #[should_panic(expected = r#"keyword map has repeated key: "x""#)] + fn test_keyword_map_of() { + keyword_map_of!(("x", Test::entid()), + ("x", Test::entid())); + } + + #[test] + fn test_iter() { + // A vector and a map iterated as a keyword map produce the same elements. + let input = edn::parse::value("[:y 3 4 :x 1 2]").expect("to be able to parse input as EDN"); + assert_eq!(input.child_iter().cloned().map(|x| x.without_spans()).into_iter().collect::>(), + edn::parse::value("[:y 3 4 :x 1 2]").expect("to be able to parse input as EDN").without_spans().into_vector().expect("an EDN vector")); + + let input = edn::parse::value("{:x [1 2] :y [3 4]}").expect("to be able to parse input as EDN"); + assert_eq!(input.keyword_map_iter().cloned().map(|x| x.without_spans()).into_iter().collect::>(), + edn::parse::value("[:y 3 4 :x 1 2]").expect("to be able to parse input as EDN").without_spans().into_vector().expect("an EDN vector")); + + // Parsing a keyword map in map and vector form produces the same elements. The order (:y + // before :x) is a foible of our EDN implementation and could be easily changed. + assert_edn_parses_to!(|| keyword_map().or(vector()).map(|x| x.0.map(|x| x.clone().without_spans()).into_iter().collect::>()), + "{:x [1] :y [2]}", + edn::parse::value("[:y 2 :x 1]").expect("to be able to parse input as EDN").without_spans().into_vector().expect("an EDN vector")); + + assert_edn_parses_to!(|| keyword_map().or(vector()).map(|x| x.0.map(|x| x.clone().without_spans()).into_iter().collect::>()), + "[:y 2 :x 1]", + edn::parse::value("[:y 2 :x 1]").expect("to be able to parse input as EDN").without_spans().into_vector().expect("an EDN vector")); } #[test] fn test_keyword_map() { - assert_keyword_map_eq!( - "[:foo 1 2 3 :bar 4]", - Some("{:foo [1 2 3] :bar [4]}")); + assert_edn_parses_to!(|| vector().of_exactly(keyword_map_of!(("x", Test::entid()), ("y", Test::entid()))), + "[:y 2 :x 1]", + (Some(1), Some(2))); - // Trailing keywords aren't allowed. - assert_keyword_map_eq!( - "[:foo]", - None); - assert_keyword_map_eq!( - "[:foo 2 :bar]", - None); + assert_edn_parses_to!(|| vector().of_exactly(keyword_map_of!(("x", Test::entid()), ("y", Test::entid()))), + "[:x 1 :y 2]", + (Some(1), Some(2))); - // Duplicate keywords aren't allowed. - assert_keyword_map_eq!( - "[:foo 2 :foo 1]", - None); + assert_edn_parses_to!(|| vector().of_exactly(keyword_map_of!(("x", Test::entid()), ("y", Test::entid()))), + "[:x 1]", + (Some(1), None)); - // Starting with anything but a keyword isn't allowed. - assert_keyword_map_eq!( - "[2 :foo 1]", - None); + assert_edn_parses_to!(|| vector().of_exactly(keyword_map_of!(("x", vector().of_exactly(many::, _>(Test::entid()))), + ("y", vector().of_exactly(many::, _>(Test::entid()))))), + "[:x [] :y [1 2]]", + (Some(vec![]), Some(vec![1, 2]))); - // Consecutive keywords aren't allowed. - assert_keyword_map_eq!( - "[:foo :bar 1]", - None); - - // Empty lists return an empty map. - assert_keyword_map_eq!( - "[]", - Some("{}")); + assert_edn_parses_to!(|| vector().of_exactly(keyword_map_of!(("x", vector().of_exactly(many::, _>(Test::entid()))), + ("y", vector().of_exactly(many::, _>(Test::entid()))))), + "[]", + (None, None)); } + + #[test] + fn test_keyword_map_failures() { + assert_parse_failure_contains!(|| vector().of_exactly(keyword_map_of!(("x", Test::entid()), ("y", Test::entid()))), + "[:x 1 :x 2]", + r#"errors: [Unexpected(Token(ValueAndSpan { inner: Keyword(Keyword("x"))"#); + } + + + // assert_edn_parses_to!(|| keyword_map().or(vector()).map(|x| x.0.map(|x| x.clone().without_spans()).into_iter().collect::>()), "{:x [1] :y [2]}", vec![]); + + // assert_edn_parses_to!(|| keyword_map().or(vector()).of_exactly((Test::entid(), Test::entid())), "{:x [1] :y [2]}", (-1, 1)); + + // assert_edn_parses_to!(|| kw_map().of_exactly((Test::entid(), Test::entid())), "[:a 0 :b 0 1]", (1, 1)); + + // assert_edn_parses_to!(|| keyword_map_of(&[(":kw1", Test::entid()), + // (":kw2", (Test::entid(), Test::entid())),]), + // "{:kw1 0 :kw2 1 :x/y}", ((Some(0), Some((0, 1))))); + + + + + // let input = edn::parse::value("[:x/y]").expect("to be able to parse input as EDN"); + // let par = vector().of_exactly(Test::entid()); + // let stream: Stream = (&input).atom_stream(); + // let result = par.skip(eof()).parse(stream).map(|x| x.0); + // assert_eq!(result, Ok(1)); + // } + + // #[test] + // fn test_keyword_map() { + // assert_keyword_map_eq!( + // "[:foo 1 2 3 :bar 4]", + // Some("{:foo [1 2 3] :bar [4]}")); + + // // Trailing keywords aren't allowed. + // assert_keyword_map_eq!( + // "[:foo]", + // None); + // assert_keyword_map_eq!( + // "[:foo 2 :bar]", + // None); + + // // Duplicate keywords aren't allowed. + // assert_keyword_map_eq!( + // "[:foo 2 :foo 1]", + // None); + + // // Starting with anything but a keyword isn't allowed. + // assert_keyword_map_eq!( + // "[2 :foo 1]", + // None); + + // // Consecutive keywords aren't allowed. + // assert_keyword_map_eq!( + // "[:foo :bar 1]", + // None); + + // // Empty lists return an empty map. + // assert_keyword_map_eq!( + // "[]", + // Some("{}")); + // } } diff --git a/query-parser/Cargo.toml b/query-parser/Cargo.toml index 69488bbd..96f0599b 100644 --- a/query-parser/Cargo.toml +++ b/query-parser/Cargo.toml @@ -4,7 +4,7 @@ version = "0.0.1" workspace = ".." [dependencies] -combine = "2.2.2" +combine = "2.3.2" error-chain = "0.8.1" matches = "0.1" diff --git a/query-parser/src/parse.rs b/query-parser/src/parse.rs index b9c36077..ab2d857c 100644 --- a/query-parser/src/parse.rs +++ b/query-parser/src/parse.rs @@ -21,6 +21,7 @@ use self::combine::{eof, many, many1, optional, parser, satisfy, satisfy_map, Pa use self::combine::combinator::{any, choice, or, try}; use self::mentat_parser_utils::{ + KeywordMapParser, ResultParser, ValueParseError, }; @@ -79,12 +80,12 @@ error_chain! { display("not a variable: '{}'", value) } - FindParseError(e: combine::ParseError) { + FindParseError(e: ValueParseError) { description(":find parse error") display(":find parse error") } - WhereParseError(e: combine::ParseError) { + WhereParseError(e: ValueParseError) { description(":where parse error") display(":where parse error") } @@ -117,7 +118,7 @@ error_chain! { } } -pub struct Query; +pub struct Query<'a>(std::marker::PhantomData<&'a ()>); def_parser!(Query, variable, Variable, { satisfy_map(Variable::from_value) @@ -141,7 +142,7 @@ def_parser!(Query, arguments, Vec, { }); def_parser!(Query, direction, Direction, { - satisfy_map(|v: edn::ValueAndSpan| { + satisfy_map(|v: &edn::ValueAndSpan| { match v.inner { edn::SpannedValue::PlainSymbol(ref s) => { let name = s.0.as_str(); @@ -162,20 +163,20 @@ def_parser!(Query, order, Order, { .or(Query::variable().map(|v| Order(Direction::Ascending, v))) }); -pub struct Where; +pub struct Where<'a>(std::marker::PhantomData<&'a ()>); def_parser!(Where, pattern_value_place, PatternValuePlace, { satisfy_map(PatternValuePlace::from_value) }); def_parser!(Query, natural_number, u64, { - any().and_then(|v: edn::ValueAndSpan| { + any().and_then(|v: &edn::ValueAndSpan| { match v.inner { edn::SpannedValue::Integer(x) if (x > 0) => { Ok(x as u64) }, - spanned => { - let e = Box::new(Error::from_kind(ErrorKind::InvalidLimit(spanned.into()))); + ref spanned => { + let e = Box::new(Error::from_kind(ErrorKind::InvalidLimit(spanned.clone().into()))); Err(combine::primitives::Error::Other(e)) }, } @@ -338,7 +339,7 @@ def_parser!(Where, clauses, Vec, { (many1::, _>(Where::clause())) }); -pub struct Find; +pub struct Find<'a>(std::marker::PhantomData<&'a ()>); def_matches_plain_symbol!(Find, period, "."); @@ -347,7 +348,6 @@ def_matches_plain_symbol!(Find, ellipsis, "..."); def_parser!(Find, find_scalar, FindSpec, { Query::variable() .skip(Find::period()) - .skip(eof()) .map(|var| FindSpec::FindScalar(Element::Variable(var))) }); @@ -392,91 +392,37 @@ def_parser!(Find, spec, FindSpec, { &mut try(Find::find_rel())]) }); -def_matches_keyword!(Find, literal_find, "find"); -def_matches_keyword!(Find, literal_in, "in"); -def_matches_keyword!(Find, literal_limit, "limit"); -def_matches_keyword!(Find, literal_order, "order"); -def_matches_keyword!(Find, literal_where, "where"); -def_matches_keyword!(Find, literal_with, "with"); - -/// Express something close to a builder pattern for a `FindQuery`. -enum FindQueryPart { - FindSpec(FindSpec), - In(BTreeSet), - Limit(Limit), - Order(Vec), - WhereClauses(Vec), - With(BTreeSet), -} - def_parser!(Find, vars, BTreeSet, { - vector().of_exactly(many(Query::variable()).and_then(|vars: Vec| { - let given = vars.len(); - let set: BTreeSet = vars.into_iter().collect(); - if given != set.len() { - // TODO: find out what the variable is! - let e = Box::new(Error::from_kind(ErrorKind::DuplicateVariableError)); - Err(combine::primitives::Error::Other(e)) - } else { - Ok(set) - } - })) + many(Query::variable()).and_then(|vars: Vec| { + let given = vars.len(); + let set: BTreeSet = vars.into_iter().collect(); + if given != set.len() { + // TODO: find out what the variable is! + let e = Box::new(Error::from_kind(ErrorKind::DuplicateVariableError)); + Err(combine::primitives::Error::Other(e)) + } else { + Ok(set) + } + }) }); /// This is awkward, but will do for now. We use `keyword_map()` to optionally accept vector find /// queries, then we use `FindQueryPart` to collect parts that have heterogeneous types; and then we /// construct a `FindQuery` from them. def_parser!(Find, query, FindQuery, { - let p_find_spec = Find::literal_find() - .with(vector().of_exactly(Find::spec().map(FindQueryPart::FindSpec))); + let find_map = keyword_map_of!( + ("find", Find::spec()), + ("in", Find::vars()), + ("limit", Query::variable().map(Limit::Variable).or(Query::natural_number().map(Limit::Fixed))), + ("order", many1(Query::order())), + ("where", Where::clauses()), + ("with", Find::vars()) // Note: no trailing comma allowed! + ); - let p_in_vars = Find::literal_in() - .with(Find::vars().map(FindQueryPart::In)); - - let p_limit = Find::literal_limit() - .with(vector().of_exactly( - Query::variable().map(|v| Limit::Variable(v)) - .or(Query::natural_number().map(|n| Limit::Fixed(n))))) - .map(FindQueryPart::Limit); - - let p_order_clauses = Find::literal_order() - .with(vector().of_exactly(many1(Query::order()).map(FindQueryPart::Order))); - - let p_where_clauses = Find::literal_where() - .with(vector().of_exactly(Where::clauses().map(FindQueryPart::WhereClauses))) - .expected(":where clauses"); - - let p_with_vars = Find::literal_with() - .with(Find::vars().map(FindQueryPart::With)); - - (or(map(), keyword_map())) - .of_exactly(many(choice::<[&mut Parser; 6], _>([ - // Ordered by likelihood. - &mut try(p_find_spec), - &mut try(p_where_clauses), - &mut try(p_in_vars), - &mut try(p_limit), - &mut try(p_order_clauses), - &mut try(p_with_vars), - ]))) - .and_then(|parts: Vec| -> std::result::Result> { - let mut find_spec = None; - let mut in_vars = None; - let mut limit = Limit::None; - let mut order_clauses = None; - let mut where_clauses = None; - let mut with_vars = None; - - for part in parts { - match part { - FindQueryPart::FindSpec(x) => find_spec = Some(x), - FindQueryPart::In(x) => in_vars = Some(x), - FindQueryPart::Limit(x) => limit = x, - FindQueryPart::Order(x) => order_clauses = Some(x), - FindQueryPart::WhereClauses(x) => where_clauses = Some(x), - FindQueryPart::With(x) => with_vars = Some(x), - } - } + (or(keyword_map(), vector())) + .of_exactly(find_map) + .and_then(|(find_spec, in_vars, limit, order_clauses, where_clauses, with_vars) | -> std::result::Result> { + let limit = limit.unwrap_or(Limit::None); // Make sure that if we have `:limit ?x`, `?x` appears in `:in`. let in_vars = in_vars.unwrap_or(BTreeSet::default()); @@ -503,9 +449,9 @@ def_parser!(Find, query, FindQuery, { pub fn parse_find_string(string: &str) -> Result { let expr = edn::parse::value(string)?; Find::query() - .parse(expr.into_atom_stream()) + .parse(expr.atom_stream()) .map(|x| x.0) - .map_err(|e| Error::from_kind(ErrorKind::FindParseError(e))) + .map_err(|e| Error::from_kind(ErrorKind::FindParseError(e.into()))) } #[cfg(test)] @@ -594,8 +540,9 @@ mod test { edn::Value::Float(v.clone()), edn::Value::PlainSymbol(tx.clone()))); + let input = input.with_spans(); let mut par = Where::pattern(); - let result = par.parse(input.with_spans().into_atom_stream()); + let result = par.parse(input.atom_stream()); assert!(matches!(result, Err(_)), "Expected a parse error."); } @@ -635,15 +582,16 @@ mod test { let f = edn::PlainSymbol::new("?f"); let input = edn::Value::Vector(vec![edn::Value::PlainSymbol(e.clone()), edn::Value::PlainSymbol(f.clone()),]); - assert_parses_to!(Find::vars, input, + assert_parses_to!(|| vector().of_exactly(Find::vars()), input, vec![variable(e.clone()), variable(f.clone())].into_iter().collect()); let g = edn::PlainSymbol::new("?g"); let input = edn::Value::Vector(vec![edn::Value::PlainSymbol(g.clone()), edn::Value::PlainSymbol(g.clone()),]); - let mut par = Find::vars(); - let result = par.parse(input.with_spans().into_atom_stream()) + let input = input.with_spans(); + let mut par = vector().of_exactly(Find::vars()); + let result = par.parse(input.atom_stream()) .map(|x| x.0) .map_err(|e| if let Some(combine::primitives::Error::Other(x)) = e.errors.into_iter().next() { // Pattern matching on boxes is rocket science until Rust Nightly features hit @@ -806,23 +754,27 @@ mod test { let zero = edn::Value::Integer(0); let pos = edn::Value::Integer(5); - // This is terrible, but destructuring errors is a shitshow. + // This is terrible, but destructuring errors is frustrating. + let input = text.with_spans(); let mut par = Query::natural_number(); - let x = par.parse(text.with_spans().into_atom_stream()).err().expect("an error").errors; + let x = par.parse(input.atom_stream()).err().expect("an error").errors; let result = format!("{:?}", x); assert_eq!(result, "[Other(Error(InvalidLimit(Text(\"foo\")), State { next_error: None, backtrace: None })), Expected(Borrowed(\"natural_number\"))]"); + let input = neg.with_spans(); let mut par = Query::natural_number(); - let x = par.parse(neg.with_spans().into_atom_stream()).err().expect("an error").errors; + let x = par.parse(input.atom_stream()).err().expect("an error").errors; let result = format!("{:?}", x); assert_eq!(result, "[Other(Error(InvalidLimit(Integer(-10)), State { next_error: None, backtrace: None })), Expected(Borrowed(\"natural_number\"))]"); + let input = zero.with_spans(); let mut par = Query::natural_number(); - let x = par.parse(zero.with_spans().into_atom_stream()).err().expect("an error").errors; + let x = par.parse(input.atom_stream()).err().expect("an error").errors; let result = format!("{:?}", x); assert_eq!(result, "[Other(Error(InvalidLimit(Integer(0)), State { next_error: None, backtrace: None })), Expected(Borrowed(\"natural_number\"))]"); + let input = pos.with_spans(); let mut par = Query::natural_number(); - assert_eq!(None, par.parse(pos.with_spans().into_atom_stream()).err()); + assert_eq!(None, par.parse(input.atom_stream()).err()); } } diff --git a/query/src/lib.rs b/query/src/lib.rs index 1f2a6d3e..f12144bb 100644 --- a/query/src/lib.rs +++ b/query/src/lib.rs @@ -84,14 +84,14 @@ impl Variable { } pub trait FromValue { - fn from_value(v: edn::ValueAndSpan) -> Option; + fn from_value(v: &edn::ValueAndSpan) -> Option; } /// If the provided EDN value is a PlainSymbol beginning with '?', return /// it wrapped in a Variable. If not, return None. /// TODO: intern strings. #398. impl FromValue for Variable { - fn from_value(v: edn::ValueAndSpan) -> Option { + fn from_value(v: &edn::ValueAndSpan) -> Option { if let edn::SpannedValue::PlainSymbol(ref s) = v.inner { Variable::from_symbol(s) } else { @@ -129,7 +129,7 @@ impl fmt::Debug for Variable { pub struct PredicateFn(pub PlainSymbol); impl FromValue for PredicateFn { - fn from_value(v: edn::ValueAndSpan) -> Option { + fn from_value(v: &edn::ValueAndSpan) -> Option { if let edn::SpannedValue::PlainSymbol(ref s) = v.inner { PredicateFn::from_symbol(s) } else { @@ -162,7 +162,7 @@ pub enum SrcVar { } impl FromValue for SrcVar { - fn from_value(v: edn::ValueAndSpan) -> Option { + fn from_value(v: &edn::ValueAndSpan) -> Option { if let edn::SpannedValue::PlainSymbol(ref s) = v.inner { SrcVar::from_symbol(s) } else { @@ -215,9 +215,9 @@ pub enum FnArg { } impl FromValue for FnArg { - fn from_value(v: edn::ValueAndSpan) -> Option { + fn from_value(v: &edn::ValueAndSpan) -> Option { // TODO: support SrcVars. - Variable::from_value(v.clone()) // TODO: don't clone! + Variable::from_value(v) .and_then(|v| Some(FnArg::Variable(v))) .or_else(|| { println!("from_value {}", v.inner); @@ -266,7 +266,7 @@ impl PatternNonValuePlace { } impl FromValue for PatternNonValuePlace { - fn from_value(v: edn::ValueAndSpan) -> Option { + fn from_value(v: &edn::ValueAndSpan) -> Option { match v.inner { edn::SpannedValue::Integer(x) => if x >= 0 { Some(PatternNonValuePlace::Entid(x)) @@ -308,7 +308,7 @@ pub enum PatternValuePlace { } impl FromValue for PatternValuePlace { - fn from_value(v: edn::ValueAndSpan) -> Option { + fn from_value(v: &edn::ValueAndSpan) -> Option { match v.inner { edn::SpannedValue::Integer(x) => Some(PatternValuePlace::EntidOrInteger(x)), diff --git a/src/conn.rs b/src/conn.rs index 43fb7a0e..abd348ab 100644 --- a/src/conn.rs +++ b/src/conn.rs @@ -130,7 +130,7 @@ impl Conn { transaction: &str) -> Result { let assertion_vector = edn::parse::value(transaction)?; - let entities = mentat_tx_parser::Tx::parse(assertion_vector)?; + let entities = mentat_tx_parser::Tx::parse(&assertion_vector)?; let tx = sqlite.transaction()?; diff --git a/tx-parser/Cargo.toml b/tx-parser/Cargo.toml index 1fc8839c..60720ebf 100644 --- a/tx-parser/Cargo.toml +++ b/tx-parser/Cargo.toml @@ -4,7 +4,7 @@ version = "0.0.1" workspace = ".." [dependencies] -combine = "2.2.2" +combine = "2.3.2" error-chain = "0.8.1" [dependencies.edn] diff --git a/tx-parser/src/errors.rs b/tx-parser/src/errors.rs index c5f9b1fa..a4abd8fb 100644 --- a/tx-parser/src/errors.rs +++ b/tx-parser/src/errors.rs @@ -10,8 +10,7 @@ #![allow(dead_code)] -use combine; -use mentat_parser_utils::value_and_span::Stream; +use mentat_parser_utils::ValueParseError; error_chain! { types { @@ -19,7 +18,7 @@ error_chain! { } errors { - ParseError(parse_error: combine::ParseError) { + ParseError(parse_error: ValueParseError) { description("error parsing edn values") display("error parsing edn values:\n{}", parse_error) } diff --git a/tx-parser/src/lib.rs b/tx-parser/src/lib.rs index ca162b60..e92ccf35 100644 --- a/tx-parser/src/lib.rs +++ b/tx-parser/src/lib.rs @@ -21,11 +21,13 @@ extern crate mentat_tx; extern crate mentat_parser_utils; use combine::{ + choice, eof, many, parser, satisfy, satisfy_map, + try, Parser, ParseResult, }; @@ -53,12 +55,12 @@ use mentat_parser_utils::value_and_span::{ pub mod errors; pub use errors::*; -pub struct Tx; +pub struct Tx<'a>(std::marker::PhantomData<&'a ()>); def_parser!(Tx, entid, Entid, { integer() .map(|x| Entid::Entid(x)) - .or(namespaced_keyword().map(|x| Entid::Ident(x))) + .or(namespaced_keyword().map(|x| Entid::Ident(x.clone()))) }); def_matches_plain_symbol!(Tx, literal_lookup_ref, "lookup-ref"); @@ -68,7 +70,7 @@ def_parser!(Tx, lookup_ref, LookupRef, { Tx::literal_lookup_ref() .with((Tx::entid(), Tx::atom())) - .map(|(a, v)| LookupRef { a: a, v: v.without_spans() })) + .map(|(a, v)| LookupRef { a: a, v: v.clone().without_spans() })) }); def_parser!(Tx, entid_or_lookup_ref_or_temp_id, EntidOrLookupRefOrTempId, { @@ -78,11 +80,11 @@ def_parser!(Tx, entid_or_lookup_ref_or_temp_id, EntidOrLookupRefOrTempId, { }); def_parser!(Tx, temp_id, TempId, { - satisfy_map(|x: edn::ValueAndSpan| x.into_text().map(TempId::External)) + satisfy_map(|x: &'a edn::ValueAndSpan| x.as_text().cloned().map(TempId::External)) }); -def_parser!(Tx, atom, edn::ValueAndSpan, { - satisfy_map(|x: edn::ValueAndSpan| x.into_atom()) +def_parser!(Tx, atom, &'a edn::ValueAndSpan, { + satisfy_map(|x: &'a edn::ValueAndSpan| x.as_atom()) }); def_parser!(Tx, nested_vector, Vec, { @@ -90,10 +92,12 @@ def_parser!(Tx, nested_vector, Vec, { }); def_parser!(Tx, atom_or_lookup_ref_or_vector, AtomOrLookupRefOrVectorOrMapNotation, { - Tx::lookup_ref().map(AtomOrLookupRefOrVectorOrMapNotation::LookupRef) - .or(Tx::nested_vector().map(AtomOrLookupRefOrVectorOrMapNotation::Vector)) - .or(Tx::map_notation().map(AtomOrLookupRefOrVectorOrMapNotation::MapNotation)) - .or(Tx::atom().map(AtomOrLookupRefOrVectorOrMapNotation::Atom)) + choice::<[&mut Parser; 4], _> + ([&mut try(Tx::lookup_ref().map(AtomOrLookupRefOrVectorOrMapNotation::LookupRef)), + &mut Tx::nested_vector().map(AtomOrLookupRefOrVectorOrMapNotation::Vector), + &mut Tx::map_notation().map(AtomOrLookupRefOrVectorOrMapNotation::MapNotation), + &mut Tx::atom().map(|x| x.clone()).map(AtomOrLookupRefOrVectorOrMapNotation::Atom) + ]) }); def_matches_namespaced_keyword!(Tx, literal_db_add, "db", "add"); @@ -133,21 +137,21 @@ def_parser!(Tx, entities, Vec, { vector().of_exactly(many(Tx::entity())) }); -impl Tx { - pub fn parse(input: edn::ValueAndSpan) -> std::result::Result, errors::Error> { +impl<'a> Tx<'a> { + pub fn parse(input: &'a edn::ValueAndSpan) -> std::result::Result, errors::Error> { Tx::entities() .skip(eof()) - .parse(input.into_atom_stream()) + .parse(input.atom_stream()) .map(|x| x.0) - .map_err(|e| Error::from_kind(ErrorKind::ParseError(e))) + .map_err(|e| Error::from_kind(ErrorKind::ParseError(e.into()))) } fn parse_entid_or_lookup_ref_or_temp_id(input: edn::ValueAndSpan) -> std::result::Result { Tx::entid_or_lookup_ref_or_temp_id() .skip(eof()) - .parse(input.into_atom_stream()) + .parse(input.atom_stream()) .map(|x| x.0) - .map_err(|e| Error::from_kind(ErrorKind::ParseError(e))) + .map_err(|e| Error::from_kind(ErrorKind::ParseError(e.into()))) } } @@ -212,8 +216,11 @@ mod tests { kw("test", "entid"), kw("test", "a"), Value::Text("v".into())]); - let mut parser = Tx::entity(); - let result = parser.parse(input.with_spans().into_atom_stream()).map(|x| x.0); + + let input = input.with_spans(); + let stream = input.atom_stream(); + let result = Tx::entity().parse(stream).map(|x| x.0); + assert_eq!(result, Ok(Entity::AddOrRetract { op: OpType::Add, @@ -230,8 +237,11 @@ mod tests { Value::Integer(101), kw("test", "a"), Value::Text("v".into())]); - let mut parser = Tx::entity(); - let result = parser.parse(input.with_spans().into_atom_stream()).map(|x| x.0); + + let input = input.with_spans(); + let stream = input.atom_stream(); + let result = Tx::entity().parse(stream).map(|x| x.0); + assert_eq!(result, Ok(Entity::AddOrRetract { op: OpType::Retract, @@ -249,8 +259,11 @@ mod tests { Value::Text("v1".into())].into_iter().collect()), kw("test", "a"), Value::Text("v".into())]); - let mut parser = Tx::entity(); - let result = parser.parse(input.with_spans().into_atom_stream()).map(|x| x.0); + + let input = input.with_spans(); + let stream = input.atom_stream(); + let result = Tx::entity().parse(stream).map(|x| x.0); + assert_eq!(result, Ok(Entity::AddOrRetract { op: OpType::Add, @@ -271,8 +284,11 @@ mod tests { Value::Text("v1".into())].into_iter().collect()), kw("test", "a"), Value::Vector(vec![Value::Text("v1".into()), Value::Text("v2".into())])]); - let mut parser = Tx::entity(); - let result = parser.parse(input.with_spans().into_atom_stream()).map(|x| x.0); + + let input = input.with_spans(); + let stream = input.atom_stream(); + let result = Tx::entity().parse(stream).map(|x| x.0); + assert_eq!(result, Ok(Entity::AddOrRetract { op: OpType::Add, @@ -297,8 +313,10 @@ mod tests { map.insert(kw("db", "ident"), kw("test", "attribute")); let input = Value::Map(map.clone()); - let mut parser = Tx::entity(); - let result = parser.parse(input.with_spans().into_atom_stream()).map(|x| x.0); + let input = input.with_spans(); + let stream = input.atom_stream(); + let result = Tx::entity().parse(stream).map(|x| x.0); + assert_eq!(result, Ok(Entity::MapNotation(expected))); } diff --git a/tx-parser/tests/parser.rs b/tx-parser/tests/parser.rs index 57994e8a..67d60631 100644 --- a/tx-parser/tests/parser.rs +++ b/tx-parser/tests/parser.rs @@ -34,7 +34,7 @@ fn test_float_and_uuid() { "#; let edn = parse::value(input).expect("to parse test input"); - let result = Tx::parse(edn); + let result = Tx::parse(&edn); assert_eq!(result.unwrap(), vec![ Entity::AddOrRetract { @@ -61,7 +61,7 @@ fn test_entities() { let edn = parse::value(input).expect("to parse test input"); - let result = Tx::parse(edn); + let result = Tx::parse(&edn); assert_eq!(result.unwrap(), vec![ Entity::AddOrRetract {