Parse without copying streams. Fixes #436. #444. r=rnewman

We were accidentally quadratic, copying the tails of owned Vec
instances around.  This brings us down to the expected linear runtime.
This commit is contained in:
Nick Alexander 2017-05-18 10:20:06 -07:00
commit 8a8fcedd1c
16 changed files with 758 additions and 495 deletions

View file

@ -61,3 +61,6 @@ path = "query-translator"
[dependencies.mentat_tx_parser]
path = "tx-parser"
[profile.release]
debug = true

View file

@ -251,6 +251,6 @@ pub fn bootstrap_entities() -> Vec<Entity> {
// Failure here is a coding error (since the inputs are fixed), not a runtime error.
// TODO: represent these bootstrap data errors rather than just panicing.
let bootstrap_entities: Vec<Entity> = mentat_tx_parser::Tx::parse(bootstrap_assertions.with_spans()).unwrap();
let bootstrap_entities: Vec<Entity> = mentat_tx_parser::Tx::parse(&bootstrap_assertions.with_spans()).unwrap();
return bootstrap_entities;
}

View file

@ -1154,7 +1154,7 @@ mod tests {
fn transact<I>(&mut self, transaction: I) -> Result<TxReport> where I: Borrow<str> {
// Failure to parse the transaction is a coding error, so we unwrap.
let assertions = edn::parse::value(transaction.borrow()).expect(format!("to be able to parse {} into EDN", transaction.borrow()).as_str());
let entities: Vec<_> = mentat_tx_parser::Tx::parse(assertions.clone()).expect(format!("to be able to parse {} into entities", assertions).as_str());
let entities: Vec<_> = mentat_tx_parser::Tx::parse(&assertions).expect(format!("to be able to parse {} into entities", assertions).as_str());
let details = {
// The block scopes the borrow of self.sqlite.

View file

@ -110,9 +110,21 @@ impl ValueAndSpan {
}
}
pub fn as_atom(&self) -> Option<&ValueAndSpan> {
if self.inner.is_atom() {
Some(self)
} else {
None
}
}
pub fn into_text(self) -> Option<String> {
self.inner.into_text()
}
pub fn as_text(&self) -> Option<&String> {
self.inner.as_text()
}
}
impl Value {

View file

@ -5,7 +5,8 @@ authors = ["Victor Porof <vporof@mozilla.com>", "Richard Newman <rnewman@mozilla
workspace = ".."
[dependencies]
combine = "2.2.2"
combine = "2.3.2"
itertools = "0.5.9"
[dependencies.edn]
path = "../edn"

View file

@ -10,139 +10,7 @@
extern crate combine;
extern crate edn;
use combine::{
ParseResult,
};
use combine::combinator::{
Expected,
FnParser,
};
pub mod log;
pub mod value_and_span;
pub use log::{
LogParsing,
};
/// A type definition for a function parser that either parses an `O` from an input stream of type
/// `I`, or fails with an "expected" failure.
/// See <https://docs.rs/combine/2.2.1/combine/trait.Parser.html#method.expected> for more
/// illumination.
/// Nothing about this is specific to the result type of the parser.
pub type ResultParser<O, I> = Expected<FnParser<I, fn(I) -> ParseResult<O, I>>>;
/// `assert_parses_to!` simplifies some of the boilerplate around running a
/// parser function against input and expecting a certain result.
#[macro_export]
macro_rules! assert_parses_to {
( $parser: expr, $input: expr, $expected: expr ) => {{
let par = $parser();
let result = par.skip(eof()).parse($input.with_spans().into_atom_stream()).map(|x| x.0);
assert_eq!(result, Ok($expected));
}}
}
/// `assert_edn_parses_to!` simplifies some of the boilerplate around running a parser function
/// against string input and expecting a certain result.
#[macro_export]
macro_rules! assert_edn_parses_to {
( $parser: expr, $input: expr, $expected: expr ) => {{
let par = $parser();
let input = edn::parse::value($input).expect("to be able to parse input as EDN");
let result = par.skip(eof()).parse(input.into_atom_stream()).map(|x| x.0);
assert_eq!(result, Ok($expected));
}}
}
/// `satisfy_unwrap!` makes it a little easier to implement a `satisfy_map`
/// body that matches a particular `Value` enum case, otherwise returning `None`.
#[macro_export]
macro_rules! satisfy_unwrap {
( $cas: path, $var: ident, $body: block ) => {
satisfy_map(|x: edn::Value| if let $cas($var) = x $body else { None })
}
}
/// Generate a `satisfy_map` expression that matches a `PlainSymbol`
/// value with the given name.
///
/// We do this rather than using `combine::token` so that we don't
/// need to allocate a new `String` inside a `PlainSymbol` inside a `Value`
/// just to match input.
#[macro_export]
macro_rules! matches_plain_symbol {
($name: expr, $input: ident) => {
satisfy_map(|x: edn::Value| {
if let edn::Value::PlainSymbol(ref s) = x {
if s.0.as_str() == $name {
return Some(());
}
}
return None;
}).parse_stream($input)
}
}
/// Define an `impl` body for the `$parser` type. The body will contain a parser
/// function called `$name`, consuming a stream of `$item_type`s. The parser's
/// result type will be `$result_type`.
///
/// The provided `$body` will be evaluated with `$input` bound to the input stream.
///
/// `$body`, when run, should return a `ParseResult` of the appropriate result type.
#[macro_export]
macro_rules! def_parser_fn {
( $parser: ident, $name: ident, $item_type: ty, $result_type: ty, $input: ident, $body: block ) => {
impl<I> $parser<I> where I: Stream<Item = $item_type> {
fn $name() -> ResultParser<$result_type, I> {
fn inner<I: Stream<Item = $item_type>>($input: I) -> ParseResult<$result_type, I> {
$body
}
parser(inner as fn(I) -> ParseResult<$result_type, I>).expected(stringify!($name))
}
}
}
}
#[macro_export]
macro_rules! def_parser {
( $parser: ident, $name: ident, $result_type: ty, $body: block ) => {
impl $parser {
fn $name() -> ResultParser<$result_type, $crate::value_and_span::Stream> {
fn inner(input: $crate::value_and_span::Stream) -> ParseResult<$result_type, $crate::value_and_span::Stream> {
$body.parse_lazy(input).into()
}
parser(inner as fn($crate::value_and_span::Stream) -> ParseResult<$result_type, $crate::value_and_span::Stream>).expected(stringify!($name))
}
}
}
}
/// `def_value_parser_fn` is a short-cut to `def_parser_fn` with the input type
/// being `edn::Value`.
#[macro_export]
macro_rules! def_value_parser_fn {
( $parser: ident, $name: ident, $result_type: ty, $input: ident, $body: block ) => {
def_parser_fn!($parser, $name, edn::Value, $result_type, $input, $body);
}
}
/// `def_value_satisfy_parser_fn` is a short-cut to `def_parser_fn` with the input type
/// being `edn::Value` and the body being a call to `satisfy_map` with the given transformer.
///
/// In practice this allows you to simply pass a function that accepts an `&edn::Value` and
/// returns an `Option<$result_type>`: if a suitable value is at the front of the stream,
/// it will be converted and returned by the parser; otherwise, the parse will fail.
#[macro_export]
macro_rules! def_value_satisfy_parser_fn {
( $parser: ident, $name: ident, $result_type: ty, $transformer: path ) => {
def_value_parser_fn!($parser, $name, $result_type, input, {
satisfy_map(|x: edn::Value| $transformer(&x)).parse_stream(input)
});
}
}
extern crate itertools;
/// A `ValueParseError` is a `combine::primitives::ParseError`-alike that implements the `Debug`,
/// `Display`, and `std::error::Error` traits. In addition, it doesn't capture references, making
@ -152,11 +20,29 @@ macro_rules! def_value_satisfy_parser_fn {
/// `Display`; rather than introducing a newtype like `DisplayVec`, we re-use `edn::Value::Vector`.
#[derive(PartialEq)]
pub struct ValueParseError {
pub position: usize,
pub position: edn::Span,
// Think of this as `Vec<Error<edn::Value, DisplayVec<edn::Value>>>`; see above.
pub errors: Vec<combine::primitives::Error<edn::Value, edn::Value>>,
pub errors: Vec<combine::primitives::Error<edn::ValueAndSpan, edn::ValueAndSpan>>,
}
#[macro_use]
pub mod macros;
pub use macros::{
KeywordMapParser,
ResultParser,
};
pub mod log;
pub mod value_and_span;
pub use value_and_span::{
Stream,
};
pub use log::{
LogParsing,
};
impl std::fmt::Debug for ValueParseError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f,
@ -168,7 +54,7 @@ impl std::fmt::Debug for ValueParseError {
impl std::fmt::Display for ValueParseError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
try!(writeln!(f, "Parse error at {}", self.position));
try!(writeln!(f, "Parse error at {:?}", self.position));
combine::primitives::Error::fmt_errors(&self.errors, f)
}
}
@ -179,49 +65,13 @@ impl std::error::Error for ValueParseError {
}
}
impl<'a> From<combine::primitives::ParseError<&'a [edn::Value]>> for ValueParseError {
fn from(e: combine::primitives::ParseError<&'a [edn::Value]>) -> ValueParseError {
impl<'a> From<combine::primitives::ParseError<Stream<'a>>> for ValueParseError {
fn from(e: combine::primitives::ParseError<Stream<'a>>) -> ValueParseError {
ValueParseError {
position: e.position,
errors: e.errors.into_iter().map(|e| e.map_range(|r| {
let mut v = Vec::new();
v.extend_from_slice(r);
edn::Value::Vector(v)
})).collect(),
}
}
}
/// Allow to map the range types of combine::primitives::{Info, Error}.
trait MapRange<R, S> {
type Output;
fn map_range<F>(self, f: F) -> Self::Output where F: FnOnce(R) -> S;
}
impl<T, R, S> MapRange<R, S> for combine::primitives::Info<T, R> {
type Output = combine::primitives::Info<T, S>;
fn map_range<F>(self, f: F) -> combine::primitives::Info<T, S> where F: FnOnce(R) -> S {
use combine::primitives::Info::*;
match self {
Token(t) => Token(t),
Range(r) => Range(f(r)),
Owned(s) => Owned(s),
Borrowed(x) => Borrowed(x),
}
}
}
impl<T, R, S> MapRange<R, S> for combine::primitives::Error<T, R> {
type Output = combine::primitives::Error<T, S>;
fn map_range<F>(self, f: F) -> combine::primitives::Error<T, S> where F: FnOnce(R) -> S {
use combine::primitives::Error::*;
match self {
Unexpected(x) => Unexpected(x.map_range(f)),
Expected(x) => Expected(x.map_range(f)),
Message(x) => Message(x.map_range(f)),
Other(x) => Other(x),
position: e.position.0,
errors: e.errors.into_iter()
.map(|e| e.map_token(|t| t.clone()).map_range(|r| r.clone()))
.collect(),
}
}
}

137
parser-utils/src/macros.rs Normal file
View file

@ -0,0 +1,137 @@
// Copyright 2016 Mozilla
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use
// this file except in compliance with the License. You may obtain a copy of the
// License at http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
// use combine::{
// ParseResult,
// };
// use combine::combinator::{
// Expected,
// FnParser,
// };
use combine::{
ParseResult,
};
// use combine::primitives; // To not shadow Error.
// use combine::primitives::{
// Consumed,
// FastResult,
// };
use combine::combinator::{
Expected,
FnParser,
};
/// A type definition for a function parser that either parses an `O` from an input stream of type
/// `I`, or fails with an "expected" failure.
/// See <https://docs.rs/combine/2.2.1/combine/trait.Parser.html#method.expected> for more
/// illumination.
/// Nothing about this is specific to the result type of the parser.
pub type ResultParser<O, I> = Expected<FnParser<I, fn(I) -> ParseResult<O, I>>>;
pub struct KeywordMapParser<T>(pub T);
/// `satisfy_unwrap!` makes it a little easier to implement a `satisfy_map`
/// body that matches a particular `Value` enum case, otherwise returning `None`.
#[macro_export]
macro_rules! satisfy_unwrap {
( $cas: path, $var: ident, $body: block ) => {
satisfy_map(|x: edn::Value| if let $cas($var) = x $body else { None })
}
}
/// Generate a `satisfy_map` expression that matches a `PlainSymbol`
/// value with the given name.
///
/// We do this rather than using `combine::token` so that we don't
/// need to allocate a new `String` inside a `PlainSymbol` inside a `Value`
/// just to match input.
#[macro_export]
macro_rules! matches_plain_symbol {
($name: expr, $input: ident) => {
satisfy_map(|x: edn::Value| {
if let edn::Value::PlainSymbol(ref s) = x {
if s.0.as_str() == $name {
return Some(());
}
}
return None;
}).parse_stream($input)
}
}
#[macro_export]
macro_rules! def_parser {
( $parser: ident, $name: ident, $result_type: ty, $body: block ) => {
impl<'p> $parser<'p> {
fn $name<'a>() -> ResultParser<$result_type, $crate::value_and_span::Stream<'a>> {
fn inner<'a>(input: $crate::value_and_span::Stream<'a>) -> ParseResult<$result_type, $crate::value_and_span::Stream<'a>> {
$body.parse_lazy(input).into()
}
parser(inner as fn($crate::value_and_span::Stream<'a>) -> ParseResult<$result_type, $crate::value_and_span::Stream<'a>>).expected(stringify!($name))
}
}
}
}
/// `assert_parses_to!` simplifies some of the boilerplate around running a
/// parser function against input and expecting a certain result.
#[macro_export]
macro_rules! assert_parses_to {
( $parser: expr, $input: expr, $expected: expr ) => {{
let input = $input.with_spans();
let par = $parser();
let stream = input.atom_stream();
let result = par.skip(eof()).parse(stream).map(|x| x.0);
assert_eq!(result, Ok($expected));
}}
}
/// `assert_edn_parses_to!` simplifies some of the boilerplate around running a parser function
/// against string input and expecting a certain result.
#[macro_export]
macro_rules! assert_edn_parses_to {
( $parser: expr, $input: expr, $expected: expr ) => {{
let input = edn::parse::value($input).expect("to be able to parse input as EDN");
let par = $parser();
let stream = input.atom_stream();
let result = par.skip(eof()).parse(stream).map(|x| x.0);
assert_eq!(result, Ok($expected));
}}
}
/// `assert_parse_failure_contains!` simplifies running a parser function against string input and
/// expecting a certain failure. This is working around the complexity of pattern matching parse
/// errors that contain spans.
#[macro_export]
macro_rules! assert_parse_failure_contains {
( $parser: expr, $input: expr, $expected: expr ) => {{
let input = edn::parse::value($input).expect("to be able to parse input as EDN");
let par = $parser();
let stream = input.atom_stream();
let result = par.skip(eof()).parse(stream).map(|x| x.0).map_err(|e| -> ::ValueParseError { e.into() });
assert!(format!("{:?}", result).contains($expected), "Expected {:?} to contain {:?}", result, $expected);
}}
}
#[macro_export]
macro_rules! keyword_map_of {
($(($keyword:expr, $value:expr)),+) => {{
let mut seen = std::collections::BTreeSet::default();
$(
if !seen.insert($keyword) {
panic!("keyword map has repeated key: {}", stringify!($keyword));
}
)+
KeywordMapParser(($(($keyword, $value)),+))
}}
}

View file

@ -8,13 +8,15 @@
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#![allow(dead_code)]
use std;
use std::cmp::Ordering;
use std::fmt::{
Debug,
Display,
Formatter,
};
use std::cmp::Ordering;
use combine::{
ConsumedResult,
@ -22,15 +24,11 @@ use combine::{
Parser,
ParseResult,
StreamOnce,
many,
many1,
parser,
satisfy,
satisfy_map,
};
use combine::primitives; // To not shadow Error.
use combine::primitives::{
Consumed,
FastResult,
};
use combine::combinator::{
@ -40,9 +38,13 @@ use combine::combinator::{
use edn;
use macros::{
KeywordMapParser,
};
/// A wrapper to let us order `edn::Span` in whatever way is appropriate for parsing with `combine`.
#[derive(Clone, Copy, Debug)]
pub struct SpanPosition(edn::Span);
pub struct SpanPosition(pub edn::Span);
impl Display for SpanPosition {
fn fmt(&self, f: &mut Formatter) -> ::std::fmt::Result {
@ -76,29 +78,37 @@ impl Ord for SpanPosition {
/// yielding `ValueAndSpan` items, which allows us to yield uniform `combine::ParseError` types from
/// disparate parsers.
#[derive(Clone)]
pub enum IntoIter {
Empty(std::iter::Empty<edn::ValueAndSpan>),
Atom(std::iter::Once<edn::ValueAndSpan>),
Vector(std::vec::IntoIter<edn::ValueAndSpan>),
List(std::collections::linked_list::IntoIter<edn::ValueAndSpan>),
/// Iterates via a single `flat_map` [k1, v1, k2, v2, ...].
Map(std::vec::IntoIter<edn::ValueAndSpan>),
pub enum Iter<'a> {
Empty,
Atom(std::iter::Once<&'a edn::ValueAndSpan>),
Vector(std::slice::Iter<'a, edn::ValueAndSpan>),
List(std::collections::linked_list::Iter<'a, edn::ValueAndSpan>),
/// Iterates a map {:k1 v1, :k2 v2, ...} as a single `flat_map` slice [k1, v1, k2, v2, ...].
Map(std::iter::FlatMap<std::collections::btree_map::Iter<'a, edn::ValueAndSpan, edn::ValueAndSpan>,
std::iter::Chain<std::iter::Once<&'a edn::ValueAndSpan>, std::iter::Once<&'a edn::ValueAndSpan>>,
fn((&'a edn::ValueAndSpan, &'a edn::ValueAndSpan)) -> std::iter::Chain<std::iter::Once<&'a edn::ValueAndSpan>, std::iter::Once<&'a edn::ValueAndSpan>>>),
/// Iterates a map with vector values {:k1 [v11 v12 ...], :k2 [v21 v22 ...], ...} as a single
/// flattened map [k1, v11, v12, ..., k2, v21, v22, ...].
KeywordMap(std::iter::FlatMap<std::collections::btree_map::Iter<'a, edn::ValueAndSpan, edn::ValueAndSpan>,
std::iter::Chain<std::iter::Once<&'a edn::ValueAndSpan>, Box<Iter<'a>>>,
fn((&'a edn::ValueAndSpan, &'a edn::ValueAndSpan)) -> std::iter::Chain<std::iter::Once<&'a edn::ValueAndSpan>, Box<Iter<'a>>>>),
// TODO: Support Set and Map more naturally. This is significantly more work because the
// existing BTreeSet and BTreeMap iterators do not implement Clone, and implementing Clone for
// them is involved. Since we don't really need to parse sets and maps at this time, this will
// do for now.
}
impl Iterator for IntoIter {
type Item = edn::ValueAndSpan;
impl<'a> Iterator for Iter<'a> {
type Item = &'a edn::ValueAndSpan;
fn next(&mut self) -> Option<Self::Item> {
match *self {
IntoIter::Empty(ref mut i) => i.next(),
IntoIter::Atom(ref mut i) => i.next(),
IntoIter::Vector(ref mut i) => i.next(),
IntoIter::List(ref mut i) => i.next(),
IntoIter::Map(ref mut i) => i.next(),
Iter::Empty => None,
Iter::Atom(ref mut i) => i.next(),
Iter::Vector(ref mut i) => i.next(),
Iter::List(ref mut i) => i.next(),
Iter::Map(ref mut i) => i.next(),
Iter::KeywordMap(ref mut i) => i.next(),
}
}
}
@ -107,11 +117,11 @@ impl Iterator for IntoIter {
/// to `combine::IteratorStream` as produced by `combine::from_iter`, but specialized to
/// `edn::ValueAndSpan`.
#[derive(Clone)]
pub struct Stream(IntoIter, SpanPosition);
pub struct Stream<'a>(Iter<'a>, SpanPosition);
/// Things specific to parsing with `combine` and our `Stream` that need a trait to live outside of
/// the `edn` crate.
pub trait Item: Clone + PartialEq + Sized {
pub trait Item<'a>: Clone + PartialEq + Sized {
/// Position could be specialized to `SpanPosition`.
type Position: Clone + Ord + std::fmt::Display;
@ -120,13 +130,16 @@ pub trait Item: Clone + PartialEq + Sized {
fn start(&self) -> Self::Position;
fn update_position(&self, &mut Self::Position);
fn into_child_stream_iter(self) -> IntoIter;
fn into_child_stream(self) -> Stream;
fn into_atom_stream_iter(self) -> IntoIter;
fn into_atom_stream(self) -> Stream;
fn child_iter(&'a self) -> Iter<'a>;
fn child_stream(&'a self) -> Stream<'a>;
fn atom_iter(&'a self) -> Iter<'a>;
fn atom_stream(&'a self) -> Stream<'a>;
fn keyword_map_iter(&'a self) -> Iter<'a>;
fn keyword_map_stream(&'a self) -> Stream<'a>;
}
impl Item for edn::ValueAndSpan {
impl<'a> Item<'a> for edn::ValueAndSpan {
type Position = SpanPosition;
fn start(&self) -> Self::Position {
@ -137,28 +150,48 @@ impl Item for edn::ValueAndSpan {
*position = SpanPosition(self.span.clone())
}
fn into_child_stream_iter(self) -> IntoIter {
match self.inner {
edn::SpannedValue::Vector(values) => IntoIter::Vector(values.into_iter()),
edn::SpannedValue::List(values) => IntoIter::List(values.into_iter()),
// Parsing pairs with `combine` is tricky; parsing sequences is easy.
edn::SpannedValue::Map(map) => IntoIter::Map(map.into_iter().flat_map(|(a, v)| std::iter::once(a).chain(std::iter::once(v))).collect::<Vec<_>>().into_iter()),
_ => IntoIter::Empty(std::iter::empty()),
fn keyword_map_iter(&'a self) -> Iter<'a> {
fn flatten_k_vector<'a>((k, v): (&'a edn::ValueAndSpan, &'a edn::ValueAndSpan)) -> std::iter::Chain<std::iter::Once<&'a edn::ValueAndSpan>, Box<Iter<'a>>> {
std::iter::once(k).chain(Box::new(v.child_iter()))
}
match self.inner.as_map() {
Some(ref map) => Iter::KeywordMap(map.iter().flat_map(flatten_k_vector)),
None => Iter::Empty
}
}
fn into_child_stream(self) -> Stream {
fn keyword_map_stream(&'a self) -> Stream<'a> {
let span = self.span.clone();
Stream(self.into_child_stream_iter(), SpanPosition(span))
Stream(self.keyword_map_iter(), SpanPosition(span))
}
fn into_atom_stream_iter(self) -> IntoIter {
IntoIter::Atom(std::iter::once(self))
fn child_iter(&'a self) -> Iter<'a> {
fn flatten_k_v<'a>((k, v): (&'a edn::ValueAndSpan, &'a edn::ValueAndSpan)) -> std::iter::Chain<std::iter::Once<&'a edn::ValueAndSpan>, std::iter::Once<&'a edn::ValueAndSpan>> {
std::iter::once(k).chain(std::iter::once(v))
}
match self.inner {
edn::SpannedValue::Vector(ref values) => Iter::Vector(values.iter()),
edn::SpannedValue::List(ref values) => Iter::List(values.iter()),
// Parsing pairs with `combine` is tricky; parsing sequences is easy.
edn::SpannedValue::Map(ref map) => Iter::Map(map.iter().flat_map(flatten_k_v)),
_ => Iter::Empty,
}
}
fn into_atom_stream(self) -> Stream {
fn child_stream(&'a self) -> Stream<'a> {
let span = self.span.clone();
Stream(self.into_atom_stream_iter(), SpanPosition(span))
Stream(self.child_iter(), SpanPosition(span))
}
fn atom_iter(&'a self) -> Iter<'a> {
Iter::Atom(std::iter::once(self))
}
fn atom_stream(&'a self) -> Stream<'a> {
let span = self.span.clone();
Stream(self.atom_iter(), SpanPosition(span))
}
}
@ -174,9 +207,26 @@ impl Item for edn::ValueAndSpan {
#[derive(Clone)]
pub struct OfExactly<P, N>(P, N);
impl<P, N, O> Parser for OfExactly<P, N>
where P: Parser<Input=Stream, Output=edn::ValueAndSpan>,
N: Parser<Input=Stream, Output=O>,
pub trait Streaming<'a> {
fn as_stream(self) -> Stream<'a>;
}
impl<'a> Streaming<'a> for &'a edn::ValueAndSpan {
fn as_stream(self) -> Stream<'a> {
self.child_stream()
}
}
impl<'a> Streaming<'a> for Stream<'a> {
fn as_stream(self) -> Stream<'a> {
self
}
}
impl<'a, P, N, M, O> Parser for OfExactly<P, N>
where P: Parser<Input=Stream<'a>, Output=M>,
N: Parser<Input=Stream<'a>, Output=O>,
M: 'a + Streaming<'a>,
{
type Input = P::Input;
type Output = O;
@ -186,7 +236,7 @@ impl<P, N, O> Parser for OfExactly<P, N>
match self.0.parse_lazy(input) {
ConsumedOk((outer_value, outer_input)) => {
match self.1.parse_lazy(outer_value.into_child_stream()) {
match self.1.parse_lazy(outer_value.as_stream()) {
ConsumedOk((inner_value, mut inner_input)) | EmptyOk((inner_value, mut inner_input)) => {
match inner_input.uncons() {
Err(ref err) if *err == primitives::Error::end_of_input() => ConsumedOk((inner_value, outer_input)),
@ -200,7 +250,7 @@ impl<P, N, O> Parser for OfExactly<P, N>
}
},
EmptyOk((outer_value, outer_input)) => {
match self.1.parse_lazy(outer_value.into_child_stream()) {
match self.1.parse_lazy(outer_value.as_stream()) {
ConsumedOk((inner_value, mut inner_input)) | EmptyOk((inner_value, mut inner_input)) => {
match inner_input.uncons() {
Err(ref err) if *err == primitives::Error::end_of_input() => EmptyOk((inner_value, outer_input)),
@ -222,9 +272,10 @@ impl<P, N, O> Parser for OfExactly<P, N>
}
#[inline(always)]
pub fn of_exactly<P, N, O>(p: P, n: N) -> OfExactly<P, N>
where P: Parser<Input=Stream, Output=edn::ValueAndSpan>,
N: Parser<Input=Stream, Output=O>,
pub fn of_exactly<'a, P, N, M, O>(p: P, n: N) -> OfExactly<P, N>
where P: Parser<Input=Stream<'a>, Output=M>,
N: Parser<Input=Stream<'a>, Output=O>,
M: 'a + Streaming<'a>,
{
OfExactly(p, n)
}
@ -236,8 +287,9 @@ pub trait OfExactlyParsing: Parser + Sized {
N: Parser<Input = Self::Input, Output=O>;
}
impl<P> OfExactlyParsing for P
where P: Parser<Input=Stream, Output=edn::ValueAndSpan>
impl<'a, P, M> OfExactlyParsing for P
where P: Parser<Input=Stream<'a>, Output=M>,
M: 'a + Streaming<'a>,
{
fn of_exactly<N, O>(self, n: N) -> OfExactly<P, N>
where N: Parser<Input = Self::Input, Output=O>
@ -247,10 +299,10 @@ impl<P> OfExactlyParsing for P
}
/// Equivalent to `combine::IteratorStream`.
impl StreamOnce for Stream
impl<'a> StreamOnce for Stream<'a>
{
type Item = edn::ValueAndSpan;
type Range = edn::ValueAndSpan;
type Item = &'a edn::ValueAndSpan;
type Range = &'a edn::ValueAndSpan;
type Position = SpanPosition;
#[inline]
@ -272,84 +324,132 @@ impl StreamOnce for Stream
/// Shorthands, just enough to convert the `mentat_db` crate for now. Written using `Box` for now:
/// it's simple and we can address allocation issues if and when they surface.
pub fn vector() -> Box<Parser<Input=Stream, Output=edn::ValueAndSpan>> {
satisfy(|v: edn::ValueAndSpan| v.inner.is_vector()).boxed()
}
pub fn list() -> Box<Parser<Input=Stream, Output=edn::ValueAndSpan>> {
satisfy(|v: edn::ValueAndSpan| v.inner.is_list()).boxed()
}
pub fn map() -> Box<Parser<Input=Stream, Output=edn::ValueAndSpan>> {
satisfy(|v: edn::ValueAndSpan| v.inner.is_map()).boxed()
}
pub fn seq() -> Box<Parser<Input=Stream, Output=edn::ValueAndSpan>> {
satisfy(|v: edn::ValueAndSpan| v.inner.is_list() || v.inner.is_vector()).boxed()
}
pub fn integer() -> Box<Parser<Input=Stream, Output=i64>> {
satisfy_map(|v: edn::ValueAndSpan| v.inner.as_integer()).boxed()
}
pub fn namespaced_keyword() -> Box<Parser<Input=Stream, Output=edn::NamespacedKeyword>> {
satisfy_map(|v: edn::ValueAndSpan| v.inner.as_namespaced_keyword().cloned()).boxed()
}
/// Like `combine::token()`, but compare an `edn::Value` to an `edn::ValueAndSpan`.
pub fn value(value: edn::Value) -> Box<Parser<Input=Stream, Output=edn::ValueAndSpan>> {
// TODO: make this comparison faster. Right now, we drop all the spans; if we walked the value
// trees together, we could avoid creating garbage.
satisfy(move |v: edn::ValueAndSpan| value == v.inner.into()).boxed()
}
fn keyword_map_(input: Stream) -> ParseResult<edn::ValueAndSpan, Stream>
{
// One run is a keyword followed by one or more non-keywords.
let run = (satisfy(|v: edn::ValueAndSpan| v.inner.is_keyword()),
many1(satisfy(|v: edn::ValueAndSpan| !v.inner.is_keyword()))
.map(|vs: Vec<edn::ValueAndSpan>| {
// TODO: extract "spanning".
let beg = vs.first().unwrap().span.0;
let end = vs.last().unwrap().span.1;
edn::ValueAndSpan {
inner: edn::SpannedValue::Vector(vs),
span: edn::Span(beg, end),
}
}));
let mut runs = vector().of_exactly(many::<Vec<_>, _>(run));
let (data, input) = try!(runs.parse_lazy(input).into());
let mut m: std::collections::BTreeMap<edn::ValueAndSpan, edn::ValueAndSpan> = std::collections::BTreeMap::default();
for (k, vs) in data {
if m.insert(k, vs).is_some() {
// TODO: improve this message.
return Err(Consumed::Empty(ParseError::from_errors(input.into_inner().position(), Vec::new())))
pub fn vector_<'a>(input: Stream<'a>) -> ParseResult<Stream<'a>, Stream<'a>> {
satisfy_map(|v: &'a edn::ValueAndSpan| {
if v.inner.is_vector() {
Some(v.child_stream())
} else {
None
}
}
let map = edn::ValueAndSpan {
inner: edn::SpannedValue::Map(m),
span: edn::Span(0, 0), // TODO: fix this.
};
Ok((map, input))
})
.parse_lazy(input)
.into()
}
/// Turn a vector of keywords and non-keyword values into a map. As an example, turn
/// ```edn
/// [:keyword1 value1 value2 ... :keyword2 value3 value4 ...]
/// ```
/// into
/// ```edn
/// {:keyword1 [value1 value2 ...] :keyword2 [value3 value4 ...]}
/// ```.
pub fn keyword_map() -> Expected<FnParser<Stream, fn(Stream) -> ParseResult<edn::ValueAndSpan, Stream>>>
{
// The `as` work arounds https://github.com/rust-lang/rust/issues/20178.
parser(keyword_map_ as fn(Stream) -> ParseResult<edn::ValueAndSpan, Stream>).expected("keyword map")
pub fn vector<'a>() -> Expected<FnParser<Stream<'a>, fn(Stream<'a>) -> ParseResult<Stream<'a>, Stream<'a>>>> {
parser(vector_ as fn(Stream<'a>) -> ParseResult<Stream<'a>, Stream<'a>>).expected("vector")
}
pub fn list_<'a>(input: Stream<'a>) -> ParseResult<Stream<'a>, Stream<'a>> {
satisfy_map(|v: &'a edn::ValueAndSpan| {
if v.inner.is_list() {
Some(v.child_stream())
} else {
None
}
})
.parse_lazy(input)
.into()
}
pub fn list<'a>() -> Expected<FnParser<Stream<'a>, fn(Stream<'a>) -> ParseResult<Stream<'a>, Stream<'a>>>> {
parser(list_ as fn(Stream<'a>) -> ParseResult<Stream<'a>, Stream<'a>>).expected("list")
}
pub fn seq_<'a>(input: Stream<'a>) -> ParseResult<Stream<'a>, Stream<'a>> {
satisfy_map(|v: &'a edn::ValueAndSpan| {
if v.inner.is_list() || v.inner.is_vector() {
Some(v.child_stream())
} else {
None
}
})
.parse_lazy(input)
.into()
}
pub fn seq<'a>() -> Expected<FnParser<Stream<'a>, fn(Stream<'a>) -> ParseResult<Stream<'a>, Stream<'a>>>> {
parser(seq_ as fn(Stream<'a>) -> ParseResult<Stream<'a>, Stream<'a>>).expected("vector|list")
}
pub fn map_<'a>(input: Stream<'a>) -> ParseResult<Stream<'a>, Stream<'a>> {
satisfy_map(|v: &'a edn::ValueAndSpan| {
if v.inner.is_map() {
Some(v.child_stream())
} else {
None
}
})
.parse_lazy(input)
.into()
}
pub fn map<'a>() -> Expected<FnParser<Stream<'a>, fn(Stream<'a>) -> ParseResult<Stream<'a>, Stream<'a>>>> {
parser(map_ as fn(Stream<'a>) -> ParseResult<Stream<'a>, Stream<'a>>).expected("map")
}
/// A `[k v]` pair in the map form of a keyword map must have the shape `[:k, [v1, v2, ...]]`, with
/// none of `v1`, `v2`, ... a keyword: without loss of generality, we cannot represent the case
/// where `vn` is a keyword `:l`, since `[:k v1 v2 ... :l]`, isn't a valid keyword map in vector
/// form. This function tests that a `[k v]` pair obeys these constraints.
///
/// If we didn't test this, then we might flatten a map `[:k [:l]] to `[:k :l]`, which isn't a valid
/// keyword map in vector form.
pub fn is_valid_keyword_map_k_v<'a>((k, v): (&'a edn::ValueAndSpan, &'a edn::ValueAndSpan)) -> bool {
if !k.inner.is_keyword() {
return false;
}
match v.inner.as_vector() {
None => {
return false;
},
Some(ref vs) => {
if !vs.iter().all(|vv| !vv.inner.is_keyword()) {
return false;
}
},
}
return true;
}
pub fn keyword_map_<'a>(input: Stream<'a>) -> ParseResult<Stream<'a>, Stream<'a>> {
satisfy_map(|v: &'a edn::ValueAndSpan| {
v.inner.as_map().and_then(|map| {
if map.iter().all(is_valid_keyword_map_k_v) {
println!("yes {:?}", map);
Some(v.keyword_map_stream())
} else {
println!("no {:?}", map);
None
}
})
})
.parse_lazy(input)
.into()
}
pub fn keyword_map<'a>() -> Expected<FnParser<Stream<'a>, fn(Stream<'a>) -> ParseResult<Stream<'a>, Stream<'a>>>> {
parser(keyword_map_ as fn(Stream<'a>) -> ParseResult<Stream<'a>, Stream<'a>>).expected("keyword map")
}
pub fn integer_<'a>(input: Stream<'a>) -> ParseResult<i64, Stream<'a>> {
satisfy_map(|v: &'a edn::ValueAndSpan| v.inner.as_integer())
.parse_lazy(input)
.into()
}
pub fn integer<'a>() -> Expected<FnParser<Stream<'a>, fn(Stream<'a>) -> ParseResult<i64, Stream<'a>>>> {
parser(integer_ as fn(Stream<'a>) -> ParseResult<i64, Stream<'a>>).expected("integer")
}
pub fn namespaced_keyword_<'a>(input: Stream<'a>) -> ParseResult<&'a edn::NamespacedKeyword, Stream<'a>> {
satisfy_map(|v: &'a edn::ValueAndSpan| v.inner.as_namespaced_keyword())
.parse_lazy(input)
.into()
}
pub fn namespaced_keyword<'a>() -> Expected<FnParser<Stream<'a>, fn(Stream<'a>) -> ParseResult<&'a edn::NamespacedKeyword, Stream<'a>>>> {
parser(namespaced_keyword_ as fn(Stream<'a>) -> ParseResult<&'a edn::NamespacedKeyword, Stream<'a>>).expected("namespaced_keyword")
}
/// Generate a `satisfy` expression that matches a `PlainSymbol` value with the given name.
@ -359,8 +459,8 @@ pub fn keyword_map() -> Expected<FnParser<Stream, fn(Stream) -> ParseResult<edn:
#[macro_export]
macro_rules! def_matches_plain_symbol {
( $parser: ident, $name: ident, $input: expr ) => {
def_parser!($parser, $name, edn::ValueAndSpan, {
satisfy(|v: edn::ValueAndSpan| {
def_parser!($parser, $name, &'a edn::ValueAndSpan, {
satisfy(|v: &'a edn::ValueAndSpan| {
match v.inner {
edn::SpannedValue::PlainSymbol(ref s) => s.0.as_str() == $input,
_ => false,
@ -376,8 +476,8 @@ macro_rules! def_matches_plain_symbol {
#[macro_export]
macro_rules! def_matches_keyword {
( $parser: ident, $name: ident, $input: expr ) => {
def_parser!($parser, $name, edn::ValueAndSpan, {
satisfy(|v: edn::ValueAndSpan| {
def_parser!($parser, $name, &'a edn::ValueAndSpan, {
satisfy(|v: &'a edn::ValueAndSpan| {
match v.inner {
edn::SpannedValue::Keyword(ref s) => s.0.as_str() == $input,
_ => false,
@ -394,8 +494,8 @@ macro_rules! def_matches_keyword {
#[macro_export]
macro_rules! def_matches_namespaced_keyword {
( $parser: ident, $name: ident, $input_namespace: expr, $input_name: expr ) => {
def_parser!($parser, $name, edn::ValueAndSpan, {
satisfy(|v: edn::ValueAndSpan| {
def_parser!($parser, $name, &'a edn::ValueAndSpan, {
satisfy(|v: &'a edn::ValueAndSpan| {
match v.inner {
edn::SpannedValue::NamespacedKeyword(ref s) => s.namespace.as_str() == $input_namespace && s.name.as_str() == $input_name,
_ => false,
@ -405,57 +505,248 @@ macro_rules! def_matches_namespaced_keyword {
}
}
use combine::primitives::{
Error,
Info,
};
use combine::primitives::FastResult::*;
/// Compare to `tuple_parser!` in `combine`.
///
/// This uses edge cases in Rust's hygienic macro system to represent arbitrary values. That is,
/// `$value: ident` represents both a type in the tuple parameterizing `KeywordMapParser` (since
/// `(A, B, C)` is a valid type declaration) and also a variable value extracted from the underlying
/// instance value. `$tmp: ident` represents an optional value to return.
///
/// This unrolls the cases. Each loop iteration reads a token. It then unrolls the known cases,
/// checking if any case matches the keyword string. If yes, we parse further. If no, we move on
/// to the next case. If no case matches, we fail.
macro_rules! keyword_map_parser {
($(($keyword:ident, $value:ident, $tmp:ident)),+) => {
impl <'a, $($value:),+> Parser for KeywordMapParser<($((&'static str, $value)),+)>
where $($value: Parser<Input=Stream<'a>>),+
{
type Input = Stream<'a>;
type Output = ($(Option<$value::Output>),+);
#[allow(non_snake_case)]
fn parse_lazy(&mut self,
mut input: Stream<'a>)
-> ConsumedResult<($(Option<$value::Output>),+), Stream<'a>> {
let ($((ref $keyword, ref mut $value)),+) = (*self).0;
let mut consumed = false;
$(
let mut $tmp = None;
)+
loop {
match input.uncons() {
Ok(value) => {
$(
if let Some(ref keyword) = value.inner.as_keyword() {
if keyword.0.as_str() == *$keyword {
if $tmp.is_some() {
// Repeated match -- bail out! Providing good error
// messages is hard; this will do for now.
return ConsumedErr(ParseError::new(input.position(), Error::Unexpected(Info::Token(value))));
}
consumed = true;
$tmp = match $value.parse_lazy(input.clone()) {
ConsumedOk((x, new_input)) => {
input = new_input;
Some(x)
}
EmptyErr(mut err) => {
if let Ok(t) = input.uncons() {
err.add_error(Error::Unexpected(Info::Token(t)));
}
if consumed {
return ConsumedErr(err)
} else {
return EmptyErr(err)
}
}
ConsumedErr(err) => return ConsumedErr(err),
EmptyOk((x, new_input)) => {
input = new_input;
Some(x)
}
};
continue
}
}
)+
// No keyword matched! Bail out.
return ConsumedErr(ParseError::new(input.position(), Error::Unexpected(Info::Token(value))));
},
Err(err) => {
if consumed {
return ConsumedOk((($($tmp),+), input))
} else {
if err == Error::end_of_input() {
return EmptyOk((($($tmp),+), input));
}
return EmptyErr(ParseError::new(input.position(), err))
}
},
}
}
}
}
}
}
keyword_map_parser!((Ak, Av, At), (Bk, Bv, Bt));
keyword_map_parser!((Ak, Av, At), (Bk, Bv, Bt), (Ck, Cv, Ct));
keyword_map_parser!((Ak, Av, At), (Bk, Bv, Bt), (Ck, Cv, Ct), (Dk, Dv, Dt));
keyword_map_parser!((Ak, Av, At), (Bk, Bv, Bt), (Ck, Cv, Ct), (Dk, Dv, Dt), (Ek, Ev, Et));
keyword_map_parser!((Ak, Av, At), (Bk, Bv, Bt), (Ck, Cv, Ct), (Dk, Dv, Dt), (Ek, Ev, Et), (Fk, Fv, Ft));
keyword_map_parser!((Ak, Av, At), (Bk, Bv, Bt), (Ck, Cv, Ct), (Dk, Dv, Dt), (Ek, Ev, Et), (Fk, Fv, Ft), (Gk, Gv, Gt));
#[cfg(test)]
mod tests {
use combine::{eof};
use combine::{
eof,
many,
satisfy,
};
use super::*;
/// Take a string `input` and a string `expected` and ensure that `input` parses to an
/// `edn::Value` keyword map equivalent to the `edn::Value` that `expected` parses to.
macro_rules! assert_keyword_map_eq {
( $input: expr, $expected: expr ) => {{
let input = edn::parse::value($input).expect("to be able to parse input EDN");
let expected = $expected.map(|e| {
edn::parse::value(e).expect("to be able to parse expected EDN").without_spans()
});
let mut par = keyword_map().map(|x| x.without_spans()).skip(eof());
let result = par.parse(input.into_atom_stream()).map(|x| x.0);
assert_eq!(result.ok(), expected);
}}
use macros::{
ResultParser,
};
/// A little test parser.
pub struct Test<'a>(std::marker::PhantomData<&'a ()>);
def_matches_namespaced_keyword!(Test, add, "db", "add");
def_parser!(Test, entid, i64, {
integer()
.map(|x| x)
.or(namespaced_keyword().map(|_| -1))
});
#[test]
#[should_panic(expected = r#"keyword map has repeated key: "x""#)]
fn test_keyword_map_of() {
keyword_map_of!(("x", Test::entid()),
("x", Test::entid()));
}
#[test]
fn test_iter() {
// A vector and a map iterated as a keyword map produce the same elements.
let input = edn::parse::value("[:y 3 4 :x 1 2]").expect("to be able to parse input as EDN");
assert_eq!(input.child_iter().cloned().map(|x| x.without_spans()).into_iter().collect::<Vec<_>>(),
edn::parse::value("[:y 3 4 :x 1 2]").expect("to be able to parse input as EDN").without_spans().into_vector().expect("an EDN vector"));
let input = edn::parse::value("{:x [1 2] :y [3 4]}").expect("to be able to parse input as EDN");
assert_eq!(input.keyword_map_iter().cloned().map(|x| x.without_spans()).into_iter().collect::<Vec<_>>(),
edn::parse::value("[:y 3 4 :x 1 2]").expect("to be able to parse input as EDN").without_spans().into_vector().expect("an EDN vector"));
// Parsing a keyword map in map and vector form produces the same elements. The order (:y
// before :x) is a foible of our EDN implementation and could be easily changed.
assert_edn_parses_to!(|| keyword_map().or(vector()).map(|x| x.0.map(|x| x.clone().without_spans()).into_iter().collect::<Vec<_>>()),
"{:x [1] :y [2]}",
edn::parse::value("[:y 2 :x 1]").expect("to be able to parse input as EDN").without_spans().into_vector().expect("an EDN vector"));
assert_edn_parses_to!(|| keyword_map().or(vector()).map(|x| x.0.map(|x| x.clone().without_spans()).into_iter().collect::<Vec<_>>()),
"[:y 2 :x 1]",
edn::parse::value("[:y 2 :x 1]").expect("to be able to parse input as EDN").without_spans().into_vector().expect("an EDN vector"));
}
#[test]
fn test_keyword_map() {
assert_keyword_map_eq!(
"[:foo 1 2 3 :bar 4]",
Some("{:foo [1 2 3] :bar [4]}"));
assert_edn_parses_to!(|| vector().of_exactly(keyword_map_of!(("x", Test::entid()), ("y", Test::entid()))),
"[:y 2 :x 1]",
(Some(1), Some(2)));
// Trailing keywords aren't allowed.
assert_keyword_map_eq!(
"[:foo]",
None);
assert_keyword_map_eq!(
"[:foo 2 :bar]",
None);
assert_edn_parses_to!(|| vector().of_exactly(keyword_map_of!(("x", Test::entid()), ("y", Test::entid()))),
"[:x 1 :y 2]",
(Some(1), Some(2)));
// Duplicate keywords aren't allowed.
assert_keyword_map_eq!(
"[:foo 2 :foo 1]",
None);
assert_edn_parses_to!(|| vector().of_exactly(keyword_map_of!(("x", Test::entid()), ("y", Test::entid()))),
"[:x 1]",
(Some(1), None));
// Starting with anything but a keyword isn't allowed.
assert_keyword_map_eq!(
"[2 :foo 1]",
None);
assert_edn_parses_to!(|| vector().of_exactly(keyword_map_of!(("x", vector().of_exactly(many::<Vec<_>, _>(Test::entid()))),
("y", vector().of_exactly(many::<Vec<_>, _>(Test::entid()))))),
"[:x [] :y [1 2]]",
(Some(vec![]), Some(vec![1, 2])));
// Consecutive keywords aren't allowed.
assert_keyword_map_eq!(
"[:foo :bar 1]",
None);
// Empty lists return an empty map.
assert_keyword_map_eq!(
"[]",
Some("{}"));
assert_edn_parses_to!(|| vector().of_exactly(keyword_map_of!(("x", vector().of_exactly(many::<Vec<_>, _>(Test::entid()))),
("y", vector().of_exactly(many::<Vec<_>, _>(Test::entid()))))),
"[]",
(None, None));
}
#[test]
fn test_keyword_map_failures() {
assert_parse_failure_contains!(|| vector().of_exactly(keyword_map_of!(("x", Test::entid()), ("y", Test::entid()))),
"[:x 1 :x 2]",
r#"errors: [Unexpected(Token(ValueAndSpan { inner: Keyword(Keyword("x"))"#);
}
// assert_edn_parses_to!(|| keyword_map().or(vector()).map(|x| x.0.map(|x| x.clone().without_spans()).into_iter().collect::<Vec<_>>()), "{:x [1] :y [2]}", vec![]);
// assert_edn_parses_to!(|| keyword_map().or(vector()).of_exactly((Test::entid(), Test::entid())), "{:x [1] :y [2]}", (-1, 1));
// assert_edn_parses_to!(|| kw_map().of_exactly((Test::entid(), Test::entid())), "[:a 0 :b 0 1]", (1, 1));
// assert_edn_parses_to!(|| keyword_map_of(&[(":kw1", Test::entid()),
// (":kw2", (Test::entid(), Test::entid())),]),
// "{:kw1 0 :kw2 1 :x/y}", ((Some(0), Some((0, 1)))));
// let input = edn::parse::value("[:x/y]").expect("to be able to parse input as EDN");
// let par = vector().of_exactly(Test::entid());
// let stream: Stream = (&input).atom_stream();
// let result = par.skip(eof()).parse(stream).map(|x| x.0);
// assert_eq!(result, Ok(1));
// }
// #[test]
// fn test_keyword_map() {
// assert_keyword_map_eq!(
// "[:foo 1 2 3 :bar 4]",
// Some("{:foo [1 2 3] :bar [4]}"));
// // Trailing keywords aren't allowed.
// assert_keyword_map_eq!(
// "[:foo]",
// None);
// assert_keyword_map_eq!(
// "[:foo 2 :bar]",
// None);
// // Duplicate keywords aren't allowed.
// assert_keyword_map_eq!(
// "[:foo 2 :foo 1]",
// None);
// // Starting with anything but a keyword isn't allowed.
// assert_keyword_map_eq!(
// "[2 :foo 1]",
// None);
// // Consecutive keywords aren't allowed.
// assert_keyword_map_eq!(
// "[:foo :bar 1]",
// None);
// // Empty lists return an empty map.
// assert_keyword_map_eq!(
// "[]",
// Some("{}"));
// }
}

View file

@ -4,7 +4,7 @@ version = "0.0.1"
workspace = ".."
[dependencies]
combine = "2.2.2"
combine = "2.3.2"
error-chain = "0.8.1"
matches = "0.1"

View file

@ -21,6 +21,7 @@ use self::combine::{eof, many, many1, optional, parser, satisfy, satisfy_map, Pa
use self::combine::combinator::{any, choice, or, try};
use self::mentat_parser_utils::{
KeywordMapParser,
ResultParser,
ValueParseError,
};
@ -79,12 +80,12 @@ error_chain! {
display("not a variable: '{}'", value)
}
FindParseError(e: combine::ParseError<ValueStream>) {
FindParseError(e: ValueParseError) {
description(":find parse error")
display(":find parse error")
}
WhereParseError(e: combine::ParseError<ValueStream>) {
WhereParseError(e: ValueParseError) {
description(":where parse error")
display(":where parse error")
}
@ -117,7 +118,7 @@ error_chain! {
}
}
pub struct Query;
pub struct Query<'a>(std::marker::PhantomData<&'a ()>);
def_parser!(Query, variable, Variable, {
satisfy_map(Variable::from_value)
@ -141,7 +142,7 @@ def_parser!(Query, arguments, Vec<FnArg>, {
});
def_parser!(Query, direction, Direction, {
satisfy_map(|v: edn::ValueAndSpan| {
satisfy_map(|v: &edn::ValueAndSpan| {
match v.inner {
edn::SpannedValue::PlainSymbol(ref s) => {
let name = s.0.as_str();
@ -162,20 +163,20 @@ def_parser!(Query, order, Order, {
.or(Query::variable().map(|v| Order(Direction::Ascending, v)))
});
pub struct Where;
pub struct Where<'a>(std::marker::PhantomData<&'a ()>);
def_parser!(Where, pattern_value_place, PatternValuePlace, {
satisfy_map(PatternValuePlace::from_value)
});
def_parser!(Query, natural_number, u64, {
any().and_then(|v: edn::ValueAndSpan| {
any().and_then(|v: &edn::ValueAndSpan| {
match v.inner {
edn::SpannedValue::Integer(x) if (x > 0) => {
Ok(x as u64)
},
spanned => {
let e = Box::new(Error::from_kind(ErrorKind::InvalidLimit(spanned.into())));
ref spanned => {
let e = Box::new(Error::from_kind(ErrorKind::InvalidLimit(spanned.clone().into())));
Err(combine::primitives::Error::Other(e))
},
}
@ -338,7 +339,7 @@ def_parser!(Where, clauses, Vec<WhereClause>, {
(many1::<Vec<WhereClause>, _>(Where::clause()))
});
pub struct Find;
pub struct Find<'a>(std::marker::PhantomData<&'a ()>);
def_matches_plain_symbol!(Find, period, ".");
@ -347,7 +348,6 @@ def_matches_plain_symbol!(Find, ellipsis, "...");
def_parser!(Find, find_scalar, FindSpec, {
Query::variable()
.skip(Find::period())
.skip(eof())
.map(|var| FindSpec::FindScalar(Element::Variable(var)))
});
@ -392,91 +392,37 @@ def_parser!(Find, spec, FindSpec, {
&mut try(Find::find_rel())])
});
def_matches_keyword!(Find, literal_find, "find");
def_matches_keyword!(Find, literal_in, "in");
def_matches_keyword!(Find, literal_limit, "limit");
def_matches_keyword!(Find, literal_order, "order");
def_matches_keyword!(Find, literal_where, "where");
def_matches_keyword!(Find, literal_with, "with");
/// Express something close to a builder pattern for a `FindQuery`.
enum FindQueryPart {
FindSpec(FindSpec),
In(BTreeSet<Variable>),
Limit(Limit),
Order(Vec<Order>),
WhereClauses(Vec<WhereClause>),
With(BTreeSet<Variable>),
}
def_parser!(Find, vars, BTreeSet<Variable>, {
vector().of_exactly(many(Query::variable()).and_then(|vars: Vec<Variable>| {
let given = vars.len();
let set: BTreeSet<Variable> = vars.into_iter().collect();
if given != set.len() {
// TODO: find out what the variable is!
let e = Box::new(Error::from_kind(ErrorKind::DuplicateVariableError));
Err(combine::primitives::Error::Other(e))
} else {
Ok(set)
}
}))
many(Query::variable()).and_then(|vars: Vec<Variable>| {
let given = vars.len();
let set: BTreeSet<Variable> = vars.into_iter().collect();
if given != set.len() {
// TODO: find out what the variable is!
let e = Box::new(Error::from_kind(ErrorKind::DuplicateVariableError));
Err(combine::primitives::Error::Other(e))
} else {
Ok(set)
}
})
});
/// This is awkward, but will do for now. We use `keyword_map()` to optionally accept vector find
/// queries, then we use `FindQueryPart` to collect parts that have heterogeneous types; and then we
/// construct a `FindQuery` from them.
def_parser!(Find, query, FindQuery, {
let p_find_spec = Find::literal_find()
.with(vector().of_exactly(Find::spec().map(FindQueryPart::FindSpec)));
let find_map = keyword_map_of!(
("find", Find::spec()),
("in", Find::vars()),
("limit", Query::variable().map(Limit::Variable).or(Query::natural_number().map(Limit::Fixed))),
("order", many1(Query::order())),
("where", Where::clauses()),
("with", Find::vars()) // Note: no trailing comma allowed!
);
let p_in_vars = Find::literal_in()
.with(Find::vars().map(FindQueryPart::In));
let p_limit = Find::literal_limit()
.with(vector().of_exactly(
Query::variable().map(|v| Limit::Variable(v))
.or(Query::natural_number().map(|n| Limit::Fixed(n)))))
.map(FindQueryPart::Limit);
let p_order_clauses = Find::literal_order()
.with(vector().of_exactly(many1(Query::order()).map(FindQueryPart::Order)));
let p_where_clauses = Find::literal_where()
.with(vector().of_exactly(Where::clauses().map(FindQueryPart::WhereClauses)))
.expected(":where clauses");
let p_with_vars = Find::literal_with()
.with(Find::vars().map(FindQueryPart::With));
(or(map(), keyword_map()))
.of_exactly(many(choice::<[&mut Parser<Input = ValueStream, Output = FindQueryPart>; 6], _>([
// Ordered by likelihood.
&mut try(p_find_spec),
&mut try(p_where_clauses),
&mut try(p_in_vars),
&mut try(p_limit),
&mut try(p_order_clauses),
&mut try(p_with_vars),
])))
.and_then(|parts: Vec<FindQueryPart>| -> std::result::Result<FindQuery, combine::primitives::Error<edn::ValueAndSpan, edn::ValueAndSpan>> {
let mut find_spec = None;
let mut in_vars = None;
let mut limit = Limit::None;
let mut order_clauses = None;
let mut where_clauses = None;
let mut with_vars = None;
for part in parts {
match part {
FindQueryPart::FindSpec(x) => find_spec = Some(x),
FindQueryPart::In(x) => in_vars = Some(x),
FindQueryPart::Limit(x) => limit = x,
FindQueryPart::Order(x) => order_clauses = Some(x),
FindQueryPart::WhereClauses(x) => where_clauses = Some(x),
FindQueryPart::With(x) => with_vars = Some(x),
}
}
(or(keyword_map(), vector()))
.of_exactly(find_map)
.and_then(|(find_spec, in_vars, limit, order_clauses, where_clauses, with_vars) | -> std::result::Result<FindQuery, combine::primitives::Error<&edn::ValueAndSpan, &edn::ValueAndSpan>> {
let limit = limit.unwrap_or(Limit::None);
// Make sure that if we have `:limit ?x`, `?x` appears in `:in`.
let in_vars = in_vars.unwrap_or(BTreeSet::default());
@ -503,9 +449,9 @@ def_parser!(Find, query, FindQuery, {
pub fn parse_find_string(string: &str) -> Result<FindQuery> {
let expr = edn::parse::value(string)?;
Find::query()
.parse(expr.into_atom_stream())
.parse(expr.atom_stream())
.map(|x| x.0)
.map_err(|e| Error::from_kind(ErrorKind::FindParseError(e)))
.map_err(|e| Error::from_kind(ErrorKind::FindParseError(e.into())))
}
#[cfg(test)]
@ -594,8 +540,9 @@ mod test {
edn::Value::Float(v.clone()),
edn::Value::PlainSymbol(tx.clone())));
let input = input.with_spans();
let mut par = Where::pattern();
let result = par.parse(input.with_spans().into_atom_stream());
let result = par.parse(input.atom_stream());
assert!(matches!(result, Err(_)), "Expected a parse error.");
}
@ -635,15 +582,16 @@ mod test {
let f = edn::PlainSymbol::new("?f");
let input = edn::Value::Vector(vec![edn::Value::PlainSymbol(e.clone()),
edn::Value::PlainSymbol(f.clone()),]);
assert_parses_to!(Find::vars, input,
assert_parses_to!(|| vector().of_exactly(Find::vars()), input,
vec![variable(e.clone()), variable(f.clone())].into_iter().collect());
let g = edn::PlainSymbol::new("?g");
let input = edn::Value::Vector(vec![edn::Value::PlainSymbol(g.clone()),
edn::Value::PlainSymbol(g.clone()),]);
let mut par = Find::vars();
let result = par.parse(input.with_spans().into_atom_stream())
let input = input.with_spans();
let mut par = vector().of_exactly(Find::vars());
let result = par.parse(input.atom_stream())
.map(|x| x.0)
.map_err(|e| if let Some(combine::primitives::Error::Other(x)) = e.errors.into_iter().next() {
// Pattern matching on boxes is rocket science until Rust Nightly features hit
@ -806,23 +754,27 @@ mod test {
let zero = edn::Value::Integer(0);
let pos = edn::Value::Integer(5);
// This is terrible, but destructuring errors is a shitshow.
// This is terrible, but destructuring errors is frustrating.
let input = text.with_spans();
let mut par = Query::natural_number();
let x = par.parse(text.with_spans().into_atom_stream()).err().expect("an error").errors;
let x = par.parse(input.atom_stream()).err().expect("an error").errors;
let result = format!("{:?}", x);
assert_eq!(result, "[Other(Error(InvalidLimit(Text(\"foo\")), State { next_error: None, backtrace: None })), Expected(Borrowed(\"natural_number\"))]");
let input = neg.with_spans();
let mut par = Query::natural_number();
let x = par.parse(neg.with_spans().into_atom_stream()).err().expect("an error").errors;
let x = par.parse(input.atom_stream()).err().expect("an error").errors;
let result = format!("{:?}", x);
assert_eq!(result, "[Other(Error(InvalidLimit(Integer(-10)), State { next_error: None, backtrace: None })), Expected(Borrowed(\"natural_number\"))]");
let input = zero.with_spans();
let mut par = Query::natural_number();
let x = par.parse(zero.with_spans().into_atom_stream()).err().expect("an error").errors;
let x = par.parse(input.atom_stream()).err().expect("an error").errors;
let result = format!("{:?}", x);
assert_eq!(result, "[Other(Error(InvalidLimit(Integer(0)), State { next_error: None, backtrace: None })), Expected(Borrowed(\"natural_number\"))]");
let input = pos.with_spans();
let mut par = Query::natural_number();
assert_eq!(None, par.parse(pos.with_spans().into_atom_stream()).err());
assert_eq!(None, par.parse(input.atom_stream()).err());
}
}

View file

@ -84,14 +84,14 @@ impl Variable {
}
pub trait FromValue<T> {
fn from_value(v: edn::ValueAndSpan) -> Option<T>;
fn from_value(v: &edn::ValueAndSpan) -> Option<T>;
}
/// If the provided EDN value is a PlainSymbol beginning with '?', return
/// it wrapped in a Variable. If not, return None.
/// TODO: intern strings. #398.
impl FromValue<Variable> for Variable {
fn from_value(v: edn::ValueAndSpan) -> Option<Variable> {
fn from_value(v: &edn::ValueAndSpan) -> Option<Variable> {
if let edn::SpannedValue::PlainSymbol(ref s) = v.inner {
Variable::from_symbol(s)
} else {
@ -129,7 +129,7 @@ impl fmt::Debug for Variable {
pub struct PredicateFn(pub PlainSymbol);
impl FromValue<PredicateFn> for PredicateFn {
fn from_value(v: edn::ValueAndSpan) -> Option<PredicateFn> {
fn from_value(v: &edn::ValueAndSpan) -> Option<PredicateFn> {
if let edn::SpannedValue::PlainSymbol(ref s) = v.inner {
PredicateFn::from_symbol(s)
} else {
@ -162,7 +162,7 @@ pub enum SrcVar {
}
impl FromValue<SrcVar> for SrcVar {
fn from_value(v: edn::ValueAndSpan) -> Option<SrcVar> {
fn from_value(v: &edn::ValueAndSpan) -> Option<SrcVar> {
if let edn::SpannedValue::PlainSymbol(ref s) = v.inner {
SrcVar::from_symbol(s)
} else {
@ -215,9 +215,9 @@ pub enum FnArg {
}
impl FromValue<FnArg> for FnArg {
fn from_value(v: edn::ValueAndSpan) -> Option<FnArg> {
fn from_value(v: &edn::ValueAndSpan) -> Option<FnArg> {
// TODO: support SrcVars.
Variable::from_value(v.clone()) // TODO: don't clone!
Variable::from_value(v)
.and_then(|v| Some(FnArg::Variable(v)))
.or_else(|| {
println!("from_value {}", v.inner);
@ -266,7 +266,7 @@ impl PatternNonValuePlace {
}
impl FromValue<PatternNonValuePlace> for PatternNonValuePlace {
fn from_value(v: edn::ValueAndSpan) -> Option<PatternNonValuePlace> {
fn from_value(v: &edn::ValueAndSpan) -> Option<PatternNonValuePlace> {
match v.inner {
edn::SpannedValue::Integer(x) => if x >= 0 {
Some(PatternNonValuePlace::Entid(x))
@ -308,7 +308,7 @@ pub enum PatternValuePlace {
}
impl FromValue<PatternValuePlace> for PatternValuePlace {
fn from_value(v: edn::ValueAndSpan) -> Option<PatternValuePlace> {
fn from_value(v: &edn::ValueAndSpan) -> Option<PatternValuePlace> {
match v.inner {
edn::SpannedValue::Integer(x) =>
Some(PatternValuePlace::EntidOrInteger(x)),

View file

@ -130,7 +130,7 @@ impl Conn {
transaction: &str) -> Result<TxReport> {
let assertion_vector = edn::parse::value(transaction)?;
let entities = mentat_tx_parser::Tx::parse(assertion_vector)?;
let entities = mentat_tx_parser::Tx::parse(&assertion_vector)?;
let tx = sqlite.transaction()?;

View file

@ -4,7 +4,7 @@ version = "0.0.1"
workspace = ".."
[dependencies]
combine = "2.2.2"
combine = "2.3.2"
error-chain = "0.8.1"
[dependencies.edn]

View file

@ -10,8 +10,7 @@
#![allow(dead_code)]
use combine;
use mentat_parser_utils::value_and_span::Stream;
use mentat_parser_utils::ValueParseError;
error_chain! {
types {
@ -19,7 +18,7 @@ error_chain! {
}
errors {
ParseError(parse_error: combine::ParseError<Stream>) {
ParseError(parse_error: ValueParseError) {
description("error parsing edn values")
display("error parsing edn values:\n{}", parse_error)
}

View file

@ -21,11 +21,13 @@ extern crate mentat_tx;
extern crate mentat_parser_utils;
use combine::{
choice,
eof,
many,
parser,
satisfy,
satisfy_map,
try,
Parser,
ParseResult,
};
@ -53,12 +55,12 @@ use mentat_parser_utils::value_and_span::{
pub mod errors;
pub use errors::*;
pub struct Tx;
pub struct Tx<'a>(std::marker::PhantomData<&'a ()>);
def_parser!(Tx, entid, Entid, {
integer()
.map(|x| Entid::Entid(x))
.or(namespaced_keyword().map(|x| Entid::Ident(x)))
.or(namespaced_keyword().map(|x| Entid::Ident(x.clone())))
});
def_matches_plain_symbol!(Tx, literal_lookup_ref, "lookup-ref");
@ -68,7 +70,7 @@ def_parser!(Tx, lookup_ref, LookupRef, {
Tx::literal_lookup_ref()
.with((Tx::entid(),
Tx::atom()))
.map(|(a, v)| LookupRef { a: a, v: v.without_spans() }))
.map(|(a, v)| LookupRef { a: a, v: v.clone().without_spans() }))
});
def_parser!(Tx, entid_or_lookup_ref_or_temp_id, EntidOrLookupRefOrTempId, {
@ -78,11 +80,11 @@ def_parser!(Tx, entid_or_lookup_ref_or_temp_id, EntidOrLookupRefOrTempId, {
});
def_parser!(Tx, temp_id, TempId, {
satisfy_map(|x: edn::ValueAndSpan| x.into_text().map(TempId::External))
satisfy_map(|x: &'a edn::ValueAndSpan| x.as_text().cloned().map(TempId::External))
});
def_parser!(Tx, atom, edn::ValueAndSpan, {
satisfy_map(|x: edn::ValueAndSpan| x.into_atom())
def_parser!(Tx, atom, &'a edn::ValueAndSpan, {
satisfy_map(|x: &'a edn::ValueAndSpan| x.as_atom())
});
def_parser!(Tx, nested_vector, Vec<AtomOrLookupRefOrVectorOrMapNotation>, {
@ -90,10 +92,12 @@ def_parser!(Tx, nested_vector, Vec<AtomOrLookupRefOrVectorOrMapNotation>, {
});
def_parser!(Tx, atom_or_lookup_ref_or_vector, AtomOrLookupRefOrVectorOrMapNotation, {
Tx::lookup_ref().map(AtomOrLookupRefOrVectorOrMapNotation::LookupRef)
.or(Tx::nested_vector().map(AtomOrLookupRefOrVectorOrMapNotation::Vector))
.or(Tx::map_notation().map(AtomOrLookupRefOrVectorOrMapNotation::MapNotation))
.or(Tx::atom().map(AtomOrLookupRefOrVectorOrMapNotation::Atom))
choice::<[&mut Parser<Input = _, Output = AtomOrLookupRefOrVectorOrMapNotation>; 4], _>
([&mut try(Tx::lookup_ref().map(AtomOrLookupRefOrVectorOrMapNotation::LookupRef)),
&mut Tx::nested_vector().map(AtomOrLookupRefOrVectorOrMapNotation::Vector),
&mut Tx::map_notation().map(AtomOrLookupRefOrVectorOrMapNotation::MapNotation),
&mut Tx::atom().map(|x| x.clone()).map(AtomOrLookupRefOrVectorOrMapNotation::Atom)
])
});
def_matches_namespaced_keyword!(Tx, literal_db_add, "db", "add");
@ -133,21 +137,21 @@ def_parser!(Tx, entities, Vec<Entity>, {
vector().of_exactly(many(Tx::entity()))
});
impl Tx {
pub fn parse(input: edn::ValueAndSpan) -> std::result::Result<Vec<Entity>, errors::Error> {
impl<'a> Tx<'a> {
pub fn parse(input: &'a edn::ValueAndSpan) -> std::result::Result<Vec<Entity>, errors::Error> {
Tx::entities()
.skip(eof())
.parse(input.into_atom_stream())
.parse(input.atom_stream())
.map(|x| x.0)
.map_err(|e| Error::from_kind(ErrorKind::ParseError(e)))
.map_err(|e| Error::from_kind(ErrorKind::ParseError(e.into())))
}
fn parse_entid_or_lookup_ref_or_temp_id(input: edn::ValueAndSpan) -> std::result::Result<EntidOrLookupRefOrTempId, errors::Error> {
Tx::entid_or_lookup_ref_or_temp_id()
.skip(eof())
.parse(input.into_atom_stream())
.parse(input.atom_stream())
.map(|x| x.0)
.map_err(|e| Error::from_kind(ErrorKind::ParseError(e)))
.map_err(|e| Error::from_kind(ErrorKind::ParseError(e.into())))
}
}
@ -212,8 +216,11 @@ mod tests {
kw("test", "entid"),
kw("test", "a"),
Value::Text("v".into())]);
let mut parser = Tx::entity();
let result = parser.parse(input.with_spans().into_atom_stream()).map(|x| x.0);
let input = input.with_spans();
let stream = input.atom_stream();
let result = Tx::entity().parse(stream).map(|x| x.0);
assert_eq!(result,
Ok(Entity::AddOrRetract {
op: OpType::Add,
@ -230,8 +237,11 @@ mod tests {
Value::Integer(101),
kw("test", "a"),
Value::Text("v".into())]);
let mut parser = Tx::entity();
let result = parser.parse(input.with_spans().into_atom_stream()).map(|x| x.0);
let input = input.with_spans();
let stream = input.atom_stream();
let result = Tx::entity().parse(stream).map(|x| x.0);
assert_eq!(result,
Ok(Entity::AddOrRetract {
op: OpType::Retract,
@ -249,8 +259,11 @@ mod tests {
Value::Text("v1".into())].into_iter().collect()),
kw("test", "a"),
Value::Text("v".into())]);
let mut parser = Tx::entity();
let result = parser.parse(input.with_spans().into_atom_stream()).map(|x| x.0);
let input = input.with_spans();
let stream = input.atom_stream();
let result = Tx::entity().parse(stream).map(|x| x.0);
assert_eq!(result,
Ok(Entity::AddOrRetract {
op: OpType::Add,
@ -271,8 +284,11 @@ mod tests {
Value::Text("v1".into())].into_iter().collect()),
kw("test", "a"),
Value::Vector(vec![Value::Text("v1".into()), Value::Text("v2".into())])]);
let mut parser = Tx::entity();
let result = parser.parse(input.with_spans().into_atom_stream()).map(|x| x.0);
let input = input.with_spans();
let stream = input.atom_stream();
let result = Tx::entity().parse(stream).map(|x| x.0);
assert_eq!(result,
Ok(Entity::AddOrRetract {
op: OpType::Add,
@ -297,8 +313,10 @@ mod tests {
map.insert(kw("db", "ident"), kw("test", "attribute"));
let input = Value::Map(map.clone());
let mut parser = Tx::entity();
let result = parser.parse(input.with_spans().into_atom_stream()).map(|x| x.0);
let input = input.with_spans();
let stream = input.atom_stream();
let result = Tx::entity().parse(stream).map(|x| x.0);
assert_eq!(result,
Ok(Entity::MapNotation(expected)));
}

View file

@ -34,7 +34,7 @@ fn test_float_and_uuid() {
"#;
let edn = parse::value(input).expect("to parse test input");
let result = Tx::parse(edn);
let result = Tx::parse(&edn);
assert_eq!(result.unwrap(),
vec![
Entity::AddOrRetract {
@ -61,7 +61,7 @@ fn test_entities() {
let edn = parse::value(input).expect("to parse test input");
let result = Tx::parse(edn);
let result = Tx::parse(&edn);
assert_eq!(result.unwrap(),
vec![
Entity::AddOrRetract {