From c1409078fa591e84225b3900a2d64e80ca1e9f2a Mon Sep 17 00:00:00 2001 From: Nick Alexander Date: Tue, 28 Mar 2017 14:10:05 -0700 Subject: [PATCH 01/18] Pre: Expose more in edn. --- edn/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/edn/src/lib.rs b/edn/src/lib.rs index 6ff2ff68..4536b6e8 100644 --- a/edn/src/lib.rs +++ b/edn/src/lib.rs @@ -26,5 +26,5 @@ pub mod parse { pub use num::BigInt; pub use ordered_float::OrderedFloat; pub use parse::ParseError; -pub use types::Value; +pub use types::{Span, SpannedValue, Value, ValueAndSpan}; pub use symbols::{Keyword, NamespacedKeyword, PlainSymbol, NamespacedSymbol}; From f807b16db1869a05f25d61a5e8256712d093d188 Mon Sep 17 00:00:00 2001 From: Nick Alexander Date: Tue, 28 Mar 2017 14:10:15 -0700 Subject: [PATCH 02/18] Pre: Make it easier to work with ValueAndSpan. with_spans() is a temporary hack, needed only because I don't care to parse the bootstrap assertions from text right now. --- edn/src/types.rs | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/edn/src/types.rs b/edn/src/types.rs index 1e2267c6..6c49b177 100644 --- a/edn/src/types.rs +++ b/edn/src/types.rs @@ -77,6 +77,37 @@ pub struct ValueAndSpan { pub span: Span, } +impl ValueAndSpan { + pub fn new(spanned_value: SpannedValue, span: I) -> ValueAndSpan where I: Into> { + ValueAndSpan { + inner: spanned_value, + span: span.into().unwrap_or(Span(0, 0)), // Think about this. + } + } + + pub fn into_atom(self) -> Option { + if self.inner.is_atom() { + Some(self) + } else { + None + } + } + + pub fn into_text(self) -> Option { + self.inner.into_text() + } +} + +impl Value { + pub fn with_spans(self) -> ValueAndSpan { + let s = self.to_pretty(120).unwrap(); + use ::parse; + let with_spans = parse::value(&s).unwrap(); + assert_eq!(self, with_spans.clone().without_spans()); + with_spans + } +} + impl From for Value { fn from(src: SpannedValue) -> Value { match src { From 8754cf224b8803be109d57c3e876590df64749e1 Mon Sep 17 00:00:00 2001 From: Nick Alexander Date: Tue, 28 Mar 2017 14:27:12 -0700 Subject: [PATCH 03/18] Part 1a: Add `value_and_span` for parsing nested `edn::ValueAndSpan` instances. I wasn't able to abstract over `edn::Value` and `edn::ValueAndSpan`; there are multiple obstacles. I chose to roll with `edn::ValueAndSpan` since it exposes the additional span information that we will want to form good error messages in the future. --- edn/src/types.rs | 5 +- parser-utils/src/lib.rs | 32 +++- parser-utils/src/value_and_span.rs | 282 +++++++++++++++++++++++++++++ 3 files changed, 313 insertions(+), 6 deletions(-) create mode 100644 parser-utils/src/value_and_span.rs diff --git a/edn/src/types.rs b/edn/src/types.rs index 6c49b177..efd2eb4c 100644 --- a/edn/src/types.rs +++ b/edn/src/types.rs @@ -81,7 +81,7 @@ impl ValueAndSpan { pub fn new(spanned_value: SpannedValue, span: I) -> ValueAndSpan where I: Into> { ValueAndSpan { inner: spanned_value, - span: span.into().unwrap_or(Span(0, 0)), // Think about this. + span: span.into().unwrap_or(Span(0, 0)), // TODO: consider if this has implications. } } @@ -99,6 +99,9 @@ impl ValueAndSpan { } impl Value { + /// For debug use only! + /// + /// But right now, it's used in the bootstrapper. We'll fix that soon. pub fn with_spans(self) -> ValueAndSpan { let s = self.to_pretty(120).unwrap(); use ::parse; diff --git a/parser-utils/src/lib.rs b/parser-utils/src/lib.rs index 1f2ac717..69b9163b 100644 --- a/parser-utils/src/lib.rs +++ b/parser-utils/src/lib.rs @@ -11,8 +11,16 @@ extern crate combine; extern crate edn; -use combine::ParseResult; -use combine::combinator::{Expected, FnParser}; +pub mod value_and_span; + +use combine::{ + ParseResult, + Stream, +}; +use combine::combinator::{ + Expected, + FnParser, +}; /// A type definition for a function parser that either parses an `O` from an input stream of type /// `I`, or fails with an "expected" failure. @@ -25,10 +33,10 @@ pub type ResultParser = Expected ParseResult>>; /// parser function against input and expecting a certain result. #[macro_export] macro_rules! assert_parses_to { - ( $parser: path, $input: expr, $expected: expr ) => {{ + ( $parser: expr, $input: expr, $expected: expr ) => {{ let mut par = $parser(); - let result = par.parse(&$input[..]); - assert_eq!(result, Ok(($expected, &[][..]))); + let result = par.parse($input.with_spans().into_atom_stream()).map(|x| x.0); // TODO: check remainder of stream. + assert_eq!(result, Ok($expected)); }} } @@ -82,6 +90,20 @@ macro_rules! def_parser_fn { } } +#[macro_export] +macro_rules! def_parser { + ( $parser: ident, $name: ident, $result_type: ty, $body: block ) => { + impl $parser { + fn $name() -> ResultParser<$result_type, $crate::value_and_span::Stream> { + fn inner(input: $crate::value_and_span::Stream) -> ParseResult<$result_type, $crate::value_and_span::Stream> { + $body.parse_lazy(input).into() + } + parser(inner as fn($crate::value_and_span::Stream) -> ParseResult<$result_type, $crate::value_and_span::Stream>).expected(stringify!($name)) + } + } + } +} + /// `def_value_parser_fn` is a short-cut to `def_parser_fn` with the input type /// being `edn::Value`. #[macro_export] diff --git a/parser-utils/src/value_and_span.rs b/parser-utils/src/value_and_span.rs new file mode 100644 index 00000000..9623d2f1 --- /dev/null +++ b/parser-utils/src/value_and_span.rs @@ -0,0 +1,282 @@ +// Copyright 2016 Mozilla +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use +// this file except in compliance with the License. You may obtain a copy of the +// License at http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +use std; +use std::fmt::{Display, Formatter}; +use std::cmp::Ordering; + +use combine::{ + ConsumedResult, + ParseError, + Parser, + StreamOnce, + eof, + satisfy, + satisfy_map, +}; +use combine::primitives; +use combine::primitives::{ + FastResult, +}; +use combine::combinator::{ + Eof, + Skip, +}; + +use edn; + +/// A wrapper to let us order `edn::Span` in whatever way is appropriate for parsing with `combine`. +#[derive(Clone, Debug)] +pub struct SpanPosition(edn::Span); + +impl Display for SpanPosition { + fn fmt(&self, f: &mut Formatter) -> ::std::fmt::Result { + write!(f, "{:?}", self.0) + } +} + +impl PartialEq for SpanPosition { + fn eq(&self, other: &Self) -> bool { + self.cmp(other) == Ordering::Equal + } +} + +impl Eq for SpanPosition { } + +impl PartialOrd for SpanPosition { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for SpanPosition { + fn cmp(&self, other: &Self) -> Ordering { + (self.0).0.cmp(&(other.0).0) + } +} + +/// An iterator specifically for iterating `edn::ValueAndSpan` instances in various ways. +/// +/// Enumerating each iteration type allows us to have a single `combine::Stream` implementation +/// yielding `ValueAndSpan` items, which allows us to yield uniform `combine::ParseError` types from +/// disparate parsers. +#[derive(Clone)] +pub enum IntoIter { + Empty(std::iter::Empty), + Atom(std::iter::Once), + Vector(std::vec::IntoIter), + List(std::collections::linked_list::IntoIter), + /// Iterates via a single `flat_map` [k1, v1, k2, v2, ...]. + Map(std::vec::IntoIter), + // TODO: Support Set and Map more naturally. This is significantly more work because the + // existing BTreeSet and BTreeMap iterators do not implement Clone, and implementing Clone for + // them is involved. Since we don't really need to parse sets and maps at this time, this will + // do for now. +} + +impl Iterator for IntoIter { + type Item = edn::ValueAndSpan; + + fn next(&mut self) -> Option { + match *self { + IntoIter::Empty(ref mut i) => i.next(), + IntoIter::Atom(ref mut i) => i.next(), + IntoIter::Vector(ref mut i) => i.next(), + IntoIter::List(ref mut i) => i.next(), + IntoIter::Map(ref mut i) => i.next(), + } + } +} + +/// A single `combine::Stream` implementation iterating `edn::ValueAndSpan` instances. Equivalent +/// to `combine::IteratorStream` as produced by `combine::from_iter`, but specialized to +/// `edn::ValueAndSpan`. +#[derive(Clone)] +pub struct Stream(IntoIter, SpanPosition); + +/// Things specific to parsing with `combine` and our `Stream` that need a trait to live outside of +/// the `edn` crate. +pub trait Item: Clone + PartialEq + Sized { + /// Position could be specialized to `SpanPosition`. + type Position: Clone + Ord + std::fmt::Display; + + /// A slight generalization of `combine::Positioner` that allows to set the position based on + /// the `edn::ValueAndSpan` being iterated. + fn start(&self) -> Self::Position; + fn update_position(&self, &mut Self::Position); + + fn into_child_stream_iter(self) -> IntoIter; + fn into_child_stream(self) -> Stream; + fn into_atom_stream_iter(self) -> IntoIter; + fn into_atom_stream(self) -> Stream; +} + +impl Item for edn::ValueAndSpan { + type Position = SpanPosition; + + fn start(&self) -> Self::Position { + SpanPosition(self.span.clone()) + } + + fn update_position(&self, position: &mut Self::Position) { + *position = SpanPosition(self.span.clone()) + } + + fn into_child_stream_iter(self) -> IntoIter { + match self.inner { + edn::SpannedValue::Vector(values) => IntoIter::Vector(values.into_iter()), + edn::SpannedValue::List(values) => IntoIter::List(values.into_iter()), + // Parsing pairs with `combine` is tricky; parsing sequences is easy. + edn::SpannedValue::Map(map) => IntoIter::Map(map.into_iter().flat_map(|(a, v)| std::iter::once(a).chain(std::iter::once(v))).collect::>().into_iter()), + _ => IntoIter::Empty(std::iter::empty()), + } + } + + fn into_child_stream(self) -> Stream { + let span = self.span.clone(); + Stream(self.into_child_stream_iter(), SpanPosition(span)) + } + + fn into_atom_stream_iter(self) -> IntoIter { + IntoIter::Atom(std::iter::once(self)) + } + + fn into_atom_stream(self) -> Stream { + let span = self.span.clone(); + Stream(self.into_atom_stream_iter(), SpanPosition(span)) + } +} + +/// `Of` and `of` allow us to express nested parsers naturally. +/// +/// For example, `vector().of(many(list()))` parses a vector-of-lists, like [(1 2) (:a :b) ("test") ()]. +/// +/// The "outer" parser `P` and the "nested" parser `N` must be compatible: `P` must produce an +/// output `edn::ValueAndSpan` which can itself be turned into a stream of child elements; and `N` +/// must accept the resulting input `Stream`. This compatibility allows us to lift errors from the +/// nested parser to the outer parser, which is part of what has made parsing `&'a [edn::Value]` +/// difficult. +#[derive(Clone)] +pub struct Of(P, N); + +impl Parser for Of + where P: Parser, + N: Parser, +{ + type Input = P::Input; + type Output = O; + #[inline] + fn parse_lazy(&mut self, input: Self::Input) -> ConsumedResult { + use self::FastResult::*; + + match self.0.parse_lazy(input) { + ConsumedOk((outer_value, outer_input)) => { + match self.1.parse_lazy(outer_value.into_child_stream()) { + ConsumedOk((inner_value, _)) | EmptyOk((inner_value, _)) => ConsumedOk((inner_value, outer_input)), + // TODO: Improve the error output to reference the nested value (or span) in + // some way. This seems surprisingly difficult to do, so we just surface the + // inner error message right now. See also the comment below. + EmptyErr(e) | ConsumedErr(e) => ConsumedErr(e), + } + }, + EmptyOk((outer_value, outer_input)) => { + match self.1.parse_lazy(outer_value.into_child_stream()) { + ConsumedOk((inner_value, _)) | EmptyOk((inner_value, _)) => EmptyOk((inner_value, outer_input)), + // TODO: Improve the error output. See the comment above. + EmptyErr(e) | ConsumedErr(e) => EmptyErr(e), + } + }, + ConsumedErr(e) => ConsumedErr(e), + EmptyErr(e) => EmptyErr(e), + } + } + + fn add_error(&mut self, errors: &mut ParseError) { + self.0.add_error(errors); + } +} + +#[inline(always)] +pub fn of(p: P, n: N) -> Of>> + where P: Parser, + N: Parser, +{ + Of(p, n.skip(eof())) +} + +/// We need a trait to define `Parser.of` and have it live outside of the `combine` crate. +pub trait OfParsing: Parser + Sized { + fn of(self, n: N) -> Of>> + where Self: Sized, + N: Parser; +} + +impl

OfParsing for P + where P: Parser +{ + fn of(self, n: N) -> Of>> + where N: Parser + { + of(self, n) + } +} + +/// Equivalent to `combine::IteratorStream`. +impl StreamOnce for Stream +{ + type Item = edn::ValueAndSpan; + type Range = edn::ValueAndSpan; + type Position = SpanPosition; + + #[inline] + fn uncons(&mut self) -> std::result::Result> { + match self.0.next() { + Some(x) => { + x.update_position(&mut self.1); + Ok(x) + }, + None => Err(primitives::Error::end_of_input()), + } + } + + #[inline(always)] + fn position(&self) -> Self::Position { + self.1.clone() + } +} + +/// Shorthands, just enough to convert the `mentat_db` crate for now. Written using `Box` for now: +/// it's simple and we can address allocation issues if and when they surface. +pub fn vector() -> Box> { + satisfy(|v: edn::ValueAndSpan| v.inner.is_vector()).boxed() +} + +pub fn list() -> Box> { + satisfy(|v: edn::ValueAndSpan| v.inner.is_list()).boxed() +} + +pub fn map() -> Box> { + satisfy(|v: edn::ValueAndSpan| v.inner.is_map()).boxed() +} + +pub fn integer() -> Box> { + satisfy_map(|v: edn::ValueAndSpan| v.inner.as_integer()).boxed() +} + +pub fn namespaced_keyword() -> Box> { + satisfy_map(|v: edn::ValueAndSpan| v.inner.as_namespaced_keyword().cloned()).boxed() +} + +/// Like `combine::token()`, but compare an `edn::Value` to an `edn::ValueAndSpan`. +pub fn value(value: edn::Value) -> Box> { + // TODO: make this comparison faster. Right now, we drop all the spans; if we walked the value + // trees together, we could avoid creating garbage. + satisfy(move |v: edn::ValueAndSpan| value == v.inner.into()).boxed() +} From dcc05c643c19a1ecb3ee3c601c8d1953a6b36f8c Mon Sep 17 00:00:00 2001 From: Nick Alexander Date: Thu, 30 Mar 2017 15:02:31 -0700 Subject: [PATCH 04/18] Part 1b: Add keyword_map() parsing an `edn::Value::Vector` into an `edn::Value::map`. --- parser-utils/src/value_and_span.rs | 117 ++++++++++++++++++++++++++++- 1 file changed, 116 insertions(+), 1 deletion(-) diff --git a/parser-utils/src/value_and_span.rs b/parser-utils/src/value_and_span.rs index 9623d2f1..e0715f5d 100644 --- a/parser-utils/src/value_and_span.rs +++ b/parser-utils/src/value_and_span.rs @@ -16,17 +16,24 @@ use combine::{ ConsumedResult, ParseError, Parser, + ParseResult, StreamOnce, eof, + many, + many1, + parser, satisfy, satisfy_map, }; -use combine::primitives; +use combine::primitives; // To not shadow Error use combine::primitives::{ + Consumed, FastResult, }; use combine::combinator::{ Eof, + Expected, + FnParser, Skip, }; @@ -266,6 +273,10 @@ pub fn map() -> Box> { satisfy(|v: edn::ValueAndSpan| v.inner.is_map()).boxed() } +pub fn seq() -> Box> { + satisfy(|v: edn::ValueAndSpan| v.inner.is_list() || v.inner.is_vector()).boxed() +} + pub fn integer() -> Box> { satisfy_map(|v: edn::ValueAndSpan| v.inner.as_integer()).boxed() } @@ -280,3 +291,107 @@ pub fn value(value: edn::Value) -> Box ParseResult +{ + // One run is a keyword followed by at one or more non-keywords. + let run = (satisfy(|v: edn::ValueAndSpan| v.inner.is_keyword()), + many1(satisfy(|v: edn::ValueAndSpan| !v.inner.is_keyword())) + .map(|vs: Vec| { + // TODO: extract "spanning". + let beg = vs.first().unwrap().span.0; + let end = vs.last().unwrap().span.1; + edn::ValueAndSpan { + inner: edn::SpannedValue::Vector(vs), + span: edn::Span(beg, end), + } + })); + + let mut runs = vector().of(many::, _>(run)); + + let (data, input) = try!(runs.parse_lazy(input).into()); + + let mut m: std::collections::BTreeMap = std::collections::BTreeMap::default(); + for (k, vs) in data { + if m.insert(k, vs).is_some() { + // TODO: improve this message. + return Err(Consumed::Empty(ParseError::from_errors(input.into_inner().position(), Vec::new()))) + } + } + + let map = edn::ValueAndSpan { + inner: edn::SpannedValue::Map(m), + span: edn::Span(0, 0), // TODO: fix this. + }; + + Ok((map, input)) +} + +/// Turn a vector of keywords and non-keyword values into a map. As an example, turn +/// ```edn +/// [:keyword1 value1 value2 ... :keyword2 value3 value4 ...] +/// ``` +/// into +/// ```edn +/// {:keyword1 [value1 value2 ...] :keyword2 [value3 value4 ...]} +/// ```. +pub fn keyword_map() -> Expected ParseResult>> +{ + // The `as` work arounds https://github.com/rust-lang/rust/issues/20178. + parser(keyword_map_ as fn(Stream) -> ParseResult).expected("keyword map") +} + +#[cfg(test)] +mod tests { + use combine::{eof}; + use super::*; + + /// Take a string `input` and a string `expected` and ensure that `input` parses to an + /// `edn::Value` keyword map equivalent to the `edn::Value` that `expected` parses to. + macro_rules! assert_keyword_map_eq { + ( $input: expr, $expected: expr ) => {{ + let input = edn::parse::value($input).expect("to be able to parse input EDN"); + let expected = $expected.map(|e| { + edn::parse::value(e).expect("to be able to parse expected EDN").without_spans() + }); + let mut par = keyword_map().map(|x| x.without_spans()).skip(eof()); + let result = par.parse(input.into_atom_stream()).map(|x| x.0); + assert_eq!(result.ok(), expected); + }} + } + + #[test] + fn test_keyword_map() { + assert_keyword_map_eq!( + "[:foo 1 2 3 :bar 4]", + Some("{:foo [1 2 3] :bar [4]}")); + + // Trailing keywords aren't allowed. + assert_keyword_map_eq!( + "[:foo]", + None); + assert_keyword_map_eq!( + "[:foo 2 :bar]", + None); + + // Duplicate keywords aren't allowed. + assert_keyword_map_eq!( + "[:foo 2 :foo 1]", + None); + + // Starting with anything but a keyword isn't allowed. + assert_keyword_map_eq!( + "[2 :foo 1]", + None); + + // Consecutive keywords aren't allowed. + assert_keyword_map_eq!( + "[:foo :bar 1]", + None); + + // Empty lists return an empty map. + assert_keyword_map_eq!( + "[]", + Some("{}")); + } +} From f4e2a0471ffe53228b3bfee2bbdaf45c1322e960 Mon Sep 17 00:00:00 2001 From: Nick Alexander Date: Thu, 30 Mar 2017 15:03:40 -0700 Subject: [PATCH 05/18] Part 1c: Add `Log`/`.log(...)` for logging parser progress. This is a terrible hack, but it sure helps to debug complicated nested parsers. I don't even know what a principled approach would look like; since our parser combinators are so frequently expressed in code, it's hard to imagine a data-driven interpreter that can help debug things. --- parser-utils/src/lib.rs | 10 +++-- parser-utils/src/log.rs | 87 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+), 3 deletions(-) create mode 100644 parser-utils/src/log.rs diff --git a/parser-utils/src/lib.rs b/parser-utils/src/lib.rs index 69b9163b..5e8cf923 100644 --- a/parser-utils/src/lib.rs +++ b/parser-utils/src/lib.rs @@ -11,17 +11,21 @@ extern crate combine; extern crate edn; -pub mod value_and_span; - use combine::{ ParseResult, - Stream, }; use combine::combinator::{ Expected, FnParser, }; +pub mod log; +pub mod value_and_span; + +pub use log::{ + LogParsing, +}; + /// A type definition for a function parser that either parses an `O` from an input stream of type /// `I`, or fails with an "expected" failure. /// See for more diff --git a/parser-utils/src/log.rs b/parser-utils/src/log.rs new file mode 100644 index 00000000..9d29cbe4 --- /dev/null +++ b/parser-utils/src/log.rs @@ -0,0 +1,87 @@ +// Copyright 2016 Mozilla +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use +// this file except in compliance with the License. You may obtain a copy of the +// License at http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +use std::io::Write; + +use combine::{ + ParseError, + Parser, + ParseResult, + Stream, +}; + +/// println!, but to stderr. +/// +/// Doesn't pollute stdout, which is useful when running tests under Emacs, which parses the output +/// of the test suite to format errors and can get confused when user output is interleaved into the +/// stdout stream. +/// +/// Cribbed from http://stackoverflow.com/a/27590832. +macro_rules! println_stderr( + ($($arg:tt)*) => { { + let r = writeln!(&mut ::std::io::stderr(), $($arg)*); + r.expect("failed printing to stderr"); + } } +); + +#[derive(Clone)] +pub struct Log(P, T) + where P: Parser, + T: ::std::fmt::Debug; + +impl Parser for Log + where I: Stream, + I::Item: ::std::fmt::Debug, + P: Parser, + P::Output: ::std::fmt::Debug, + T: ::std::fmt::Debug, +{ + type Input = I; + type Output = P::Output; + + fn parse_stream(&mut self, input: I) -> ParseResult { + let head = input.clone().uncons(); + let result = self.0.parse_stream(input.clone()); + match result { + Ok((ref value, _)) => println_stderr!("{:?}: [{:?} ...] => Ok({:?})", self.1, head.ok(), value), + Err(_) => println_stderr!("{:?}: [{:?} ...] => Err(_)", self.1, head.ok()), + } + result + } + + fn add_error(&mut self, errors: &mut ParseError) { + self.0.add_error(errors); + } +} + +#[inline(always)] +pub fn log(p: P, msg: T) -> Log + where P: Parser, + T: ::std::fmt::Debug, +{ + Log(p, msg) +} + +/// We need a trait to define `Parser.log` and have it live outside of the `combine` crate. +pub trait LogParsing: Parser + Sized { + fn log(self, msg: T) -> Log + where Self: Sized, + T: ::std::fmt::Debug; +} + +impl

LogParsing for P + where P: Parser, +{ + fn log(self, msg: T) -> Log + where T: ::std::fmt::Debug, + { + log(self, msg) + } +} From e947a32c59bd9c3df055edf6f3334b653c679e11 Mon Sep 17 00:00:00 2001 From: Nick Alexander Date: Tue, 28 Mar 2017 14:38:31 -0700 Subject: [PATCH 06/18] Part 2: Use `value_and_span` apparatus in tx-parser/. I break an abstraction boundary by returning a value column `edn::ValueAndSpan` rather than just an `edn::Value`. That is, the transaction processor shouldn't care where the `edn::Value` it is processing arose -- even we care to track that information we should bake it into the `Entity` type. We do this because we need to dynamically parse the value column to support nested maps, and parsing requires a full `edn::ValueAndSpan`. Alternately, we could cheat and fake the spans when parsing nested maps, but that's potentially expensive. --- db/src/bootstrap.rs | 2 +- db/src/db.rs | 4 +- db/src/tx.rs | 6 +- tx-parser/src/errors.rs | 7 +- tx-parser/src/lib.rs | 438 +++++++++++++++++--------------------- tx-parser/tests/parser.rs | 12 +- tx/src/entities.rs | 5 +- 7 files changed, 214 insertions(+), 260 deletions(-) diff --git a/db/src/bootstrap.rs b/db/src/bootstrap.rs index 7e676dc3..d844d440 100644 --- a/db/src/bootstrap.rs +++ b/db/src/bootstrap.rs @@ -273,6 +273,6 @@ pub fn bootstrap_entities() -> Vec { // Failure here is a coding error (since the inputs are fixed), not a runtime error. // TODO: represent these bootstrap data errors rather than just panicing. - let bootstrap_entities: Vec = mentat_tx_parser::Tx::parse(&[bootstrap_assertions][..]).unwrap(); + let bootstrap_entities: Vec = mentat_tx_parser::Tx::parse(bootstrap_assertions.with_spans()).unwrap(); return bootstrap_entities; } diff --git a/db/src/db.rs b/db/src/db.rs index d39502f6..5c1d6435 100644 --- a/db/src/db.rs +++ b/db/src/db.rs @@ -1125,8 +1125,8 @@ mod tests { fn transact(&mut self, transaction: I) -> Result where I: Borrow { // Failure to parse the transaction is a coding error, so we unwrap. - let assertions = edn::parse::value(transaction.borrow()).expect(format!("to be able to parse {} into EDN", transaction.borrow()).as_str()).without_spans(); - let entities: Vec<_> = mentat_tx_parser::Tx::parse(&[assertions.clone()][..]).expect(format!("to be able to parse {} into entities", assertions).as_str()); + let assertions = edn::parse::value(transaction.borrow()).expect(format!("to be able to parse {} into EDN", transaction.borrow()).as_str()); + let entities: Vec<_> = mentat_tx_parser::Tx::parse(assertions.clone()).expect(format!("to be able to parse {} into entities", assertions).as_str()); let details = { // The block scopes the borrow of self.sqlite. diff --git a/db/src/tx.rs b/db/src/tx.rs index 907e13ff..e7464a34 100644 --- a/db/src/tx.rs +++ b/db/src/tx.rs @@ -254,13 +254,13 @@ impl<'conn, 'a> Tx<'conn, 'a> { let v = match v { entmod::AtomOrLookupRefOrVectorOrMapNotation::Atom(v) => { - if attribute.value_type == ValueType::Ref && v.is_text() { - Either::Right(LookupRefOrTempId::TempId(temp_ids.intern(v.as_text().cloned().map(TempId::External).unwrap()))) + if attribute.value_type == ValueType::Ref && v.inner.is_text() { + Either::Right(LookupRefOrTempId::TempId(temp_ids.intern(v.inner.as_text().cloned().map(TempId::External).unwrap()))) } else { // Here is where we do schema-aware typechecking: we either assert that // the given value is in the attribute's value set, or (in limited // cases) coerce the value into the attribute's value set. - let typed_value: TypedValue = self.schema.to_typed_value(&v, &attribute)?; + let typed_value: TypedValue = self.schema.to_typed_value(&v.without_spans(), &attribute)?; Either::Left(typed_value) } }, diff --git a/tx-parser/src/errors.rs b/tx-parser/src/errors.rs index 3ccf8595..c5f9b1fa 100644 --- a/tx-parser/src/errors.rs +++ b/tx-parser/src/errors.rs @@ -10,7 +10,8 @@ #![allow(dead_code)] -use mentat_parser_utils::ValueParseError; +use combine; +use mentat_parser_utils::value_and_span::Stream; error_chain! { types { @@ -18,9 +19,9 @@ error_chain! { } errors { - ParseError(value_parse_error: ValueParseError) { + ParseError(parse_error: combine::ParseError) { description("error parsing edn values") - display("error parsing edn values:\n{}", value_parse_error) + display("error parsing edn values:\n{}", parse_error) } DbIdError { diff --git a/tx-parser/src/lib.rs b/tx-parser/src/lib.rs index 9649c14b..35f148e7 100644 --- a/tx-parser/src/lib.rs +++ b/tx-parser/src/lib.rs @@ -10,8 +10,6 @@ #![allow(dead_code)] -use std::iter::once; - extern crate combine; #[macro_use] extern crate error_chain; @@ -22,13 +20,18 @@ extern crate mentat_tx; #[macro_use] extern crate mentat_parser_utils; -use combine::{eof, many, parser, satisfy_map, token, Parser, ParseResult, Stream}; -use combine::combinator::{Expected, FnParser}; +use combine::{ + eof, + many, + parser, + satisfy_map, + Parser, + ParseResult, +}; use edn::symbols::{ NamespacedKeyword, PlainSymbol, }; -use edn::types::Value; use mentat_tx::entities::{ AtomOrLookupRefOrVectorOrMapNotation, Entid, @@ -39,165 +42,114 @@ use mentat_tx::entities::{ OpType, TempId, }; -use mentat_parser_utils::{ResultParser, ValueParseError}; +use mentat_parser_utils::{ResultParser}; +use mentat_parser_utils::value_and_span::{ + Item, + OfParsing, + integer, + list, + map, + namespaced_keyword, + value, + vector, +}; pub mod errors; pub use errors::*; -pub struct Tx(::std::marker::PhantomData I>); +pub struct Tx; -type TxParser = Expected ParseResult>>; - -fn fn_parser(f: fn(I) -> ParseResult, err: &'static str) -> TxParser - where I: Stream -{ - parser(f).expected(err) -} - -def_value_satisfy_parser_fn!(Tx, integer, i64, Value::as_integer); - -fn value_to_namespaced_keyword(val: &Value) -> Option { - val.as_namespaced_keyword().cloned() -} -def_value_satisfy_parser_fn!(Tx, keyword, NamespacedKeyword, value_to_namespaced_keyword); - -def_parser_fn!(Tx, entid, Value, Entid, input, { - Tx::::integer() +def_parser!(Tx, entid, Entid, { + integer() .map(|x| Entid::Entid(x)) - .or(Tx::::keyword().map(|x| Entid::Ident(x))) - .parse_lazy(input) - .into() + .or(namespaced_keyword().map(|x| Entid::Ident(x))) }); -fn value_to_lookup_ref(val: &Value) -> Option { - val.as_list().and_then(|list| { - let vs: Vec = list.into_iter().cloned().collect(); - let mut p = token(Value::PlainSymbol(PlainSymbol::new("lookup-ref"))) - .with((Tx::<&[Value]>::entid(), - Tx::<&[Value]>::atom())) +def_parser!(Tx, lookup_ref, LookupRef, { + list() + .of(value(edn::Value::PlainSymbol(PlainSymbol::new("lookup-ref"))) + .with((Tx::entid(), + Tx::atom())) + .map(|(a, v)| LookupRef { a: a, v: v.without_spans() })) +}); + +def_parser!(Tx, entid_or_lookup_ref_or_temp_id, EntidOrLookupRefOrTempId, { + Tx::entid().map(EntidOrLookupRefOrTempId::Entid) + .or(Tx::lookup_ref().map(EntidOrLookupRefOrTempId::LookupRef)) + .or(Tx::temp_id().map(EntidOrLookupRefOrTempId::TempId)) +}); + +def_parser!(Tx, temp_id, TempId, { + satisfy_map(|x: edn::ValueAndSpan| x.into_text().map(TempId::External)) +}); + +def_parser!(Tx, atom, edn::ValueAndSpan, { + satisfy_map(|x: edn::ValueAndSpan| x.into_atom().map(|v| v)) +}); + +def_parser!(Tx, nested_vector, Vec, { + vector().of(many(Tx::atom_or_lookup_ref_or_vector())) +}); + +def_parser!(Tx, atom_or_lookup_ref_or_vector, AtomOrLookupRefOrVectorOrMapNotation, { + Tx::lookup_ref().map(AtomOrLookupRefOrVectorOrMapNotation::LookupRef) + .or(Tx::nested_vector().map(AtomOrLookupRefOrVectorOrMapNotation::Vector)) + .or(Tx::map_notation().map(AtomOrLookupRefOrVectorOrMapNotation::MapNotation)) + .or(Tx::atom().map(AtomOrLookupRefOrVectorOrMapNotation::Atom)) +}); + +def_parser!(Tx, add_or_retract, Entity, { + let add = value(edn::Value::NamespacedKeyword(NamespacedKeyword::new("db", "add"))) + .map(|_| OpType::Add); + let retract = value(edn::Value::NamespacedKeyword(NamespacedKeyword::new("db", "retract"))) + .map(|_| OpType::Retract); + let p = (add.or(retract), + Tx::entid_or_lookup_ref_or_temp_id(), + Tx::entid(), + Tx::atom_or_lookup_ref_or_vector()) + .map(|(op, e, a, v)| { + Entity::AddOrRetract { + op: op, + e: e, + a: a, + v: v, + } + }); + + vector().of(p) +}); + +def_parser!(Tx, map_notation, MapNotation, { + map() + .of(many((Tx::entid(), Tx::atom_or_lookup_ref_or_vector()))) + .map(|avs: Vec<(Entid, AtomOrLookupRefOrVectorOrMapNotation)>| -> MapNotation { + avs.into_iter().collect() + }) +}); + +def_parser!(Tx, entity, Entity, { + Tx::add_or_retract() + .or(Tx::map_notation().map(Entity::MapNotation)) +}); + +def_parser!(Tx, entities, Vec, { + vector().of(many(Tx::entity())) +}); + +impl Tx { + pub fn parse(input: edn::ValueAndSpan) -> std::result::Result, errors::Error> { + Tx::entities() .skip(eof()) - .map(|(a, v)| LookupRef { a: a, v: v }); - let r: ParseResult = p.parse_lazy(&vs[..]).into(); - r.ok().map(|x| x.0) - }) -} -def_value_satisfy_parser_fn!(Tx, lookup_ref, LookupRef, value_to_lookup_ref); - -def_parser_fn!(Tx, entid_or_lookup_ref_or_temp_id, Value, EntidOrLookupRefOrTempId, input, { - Tx::::entid().map(EntidOrLookupRefOrTempId::Entid) - .or(Tx::::lookup_ref().map(EntidOrLookupRefOrTempId::LookupRef)) - .or(Tx::::temp_id().map(EntidOrLookupRefOrTempId::TempId)) - .parse_lazy(input) - .into() -}); - -def_parser_fn!(Tx, temp_id, Value, TempId, input, { - satisfy_map(|x: Value| x.into_text().map(TempId::External)) - .parse_stream(input) -}); - -def_parser_fn!(Tx, atom, Value, Value, input, { - satisfy_map(|x: Value| x.into_atom()) - .parse_stream(input) -}); - -fn value_to_nested_vector(val: &Value) -> Option> { - val.as_vector().and_then(|vs| { - let mut p = many(Tx::<&[Value]>::atom_or_lookup_ref_or_vector()).skip(eof()); - let r: ParseResult, _> = p.parse_lazy(&vs[..]).into(); - r.map(|x| x.0).ok() - }) -} -def_value_satisfy_parser_fn!(Tx, nested_vector, Vec, value_to_nested_vector); - -def_parser_fn!(Tx, atom_or_lookup_ref_or_vector, Value, AtomOrLookupRefOrVectorOrMapNotation, input, { - Tx::::lookup_ref().map(AtomOrLookupRefOrVectorOrMapNotation::LookupRef) - .or(Tx::::nested_vector().map(AtomOrLookupRefOrVectorOrMapNotation::Vector)) - .or(Tx::::map_notation().map(AtomOrLookupRefOrVectorOrMapNotation::MapNotation)) - .or(Tx::::atom().map(AtomOrLookupRefOrVectorOrMapNotation::Atom)) - .parse_lazy(input) - .into() -}); - -fn value_to_add_or_retract(val: &Value) -> Option { - val.as_vector().and_then(|vs| { - let add = token(Value::NamespacedKeyword(NamespacedKeyword::new("db", "add"))) - .map(|_| OpType::Add); - let retract = token(Value::NamespacedKeyword(NamespacedKeyword::new("db", "retract"))) - .map(|_| OpType::Retract); - let mut p = (add.or(retract), - Tx::<&[Value]>::entid_or_lookup_ref_or_temp_id(), - Tx::<&[Value]>::entid(), - Tx::<&[Value]>::atom_or_lookup_ref_or_vector()) - .skip(eof()) - .map(|(op, e, a, v)| { - Entity::AddOrRetract { - op: op, - e: e, - a: a, - v: v, - } - }); - let r: ParseResult = p.parse_lazy(&vs[..]).into(); - r.map(|x| x.0).ok() - }) -} -def_value_satisfy_parser_fn!(Tx, add_or_retract, Entity, value_to_add_or_retract); - -fn value_to_map_notation(val: &Value) -> Option { - val.as_map().cloned().and_then(|map| { - // Parsing pairs is tricky; parsing sequences is easy. - let avseq: Vec = map.into_iter().flat_map(|(a, v)| once(a).chain(once(v))).collect(); - - let av = (Tx::<&[Value]>::entid(), - Tx::<&[Value]>::atom_or_lookup_ref_or_vector()) - .map(|(a, v)| -> (Entid, AtomOrLookupRefOrVectorOrMapNotation) { (a, v) }); - let mut p = many(av) - .skip(eof()) - .map(|avs: Vec<(Entid, AtomOrLookupRefOrVectorOrMapNotation)>| -> MapNotation { - avs.into_iter().collect() - }); - let r: ParseResult = p.parse_lazy(&avseq[..]).into(); - r.map(|x| x.0).ok() - }) -} -def_value_satisfy_parser_fn!(Tx, map_notation, MapNotation, value_to_map_notation); - -def_parser_fn!(Tx, entity, Value, Entity, input, { - let mut p = Tx::::add_or_retract() - .or(Tx::::map_notation().map(Entity::MapNotation)); - p.parse_stream(input) -}); - -fn value_to_entities(val: &Value) -> Option> { - val.as_vector().and_then(|vs| { - let mut p = many(Tx::<&[Value]>::entity()) - .skip(eof()); - let r: ParseResult, _> = p.parse_lazy(&vs[..]).into(); - r.ok().map(|x| x.0) - }) -} - -def_value_satisfy_parser_fn!(Tx, entities, Vec, value_to_entities); - -impl<'a> Tx<&'a [edn::Value]> { - pub fn parse(input: &'a [edn::Value]) -> std::result::Result, errors::Error> { - Tx::<_>::entities() - .skip(eof()) - .parse(input) + .parse(input.into_atom_stream()) .map(|x| x.0) - .map_err::(|e| e.translate_position(input).into()) .map_err(|e| Error::from_kind(ErrorKind::ParseError(e))) } -} -impl<'a> Tx<&'a [edn::Value]> { - pub fn parse_entid_or_lookup_ref_or_temp_id(input: &'a [edn::Value]) -> std::result::Result { - Tx::<_>::entid_or_lookup_ref_or_temp_id() + fn parse_entid_or_lookup_ref_or_temp_id(input: edn::ValueAndSpan) -> std::result::Result { + Tx::entid_or_lookup_ref_or_temp_id() .skip(eof()) - .parse(input) + .parse(input.into_atom_stream()) .map(|x| x.0) - .map_err::(|e| e.translate_position(input).into()) .map_err(|e| Error::from_kind(ErrorKind::ParseError(e))) } } @@ -213,7 +165,7 @@ pub fn remove_db_id(map: &mut MapNotation) -> std::result::Result = if let Some(id) = map.remove(&db_id_key) { match id { AtomOrLookupRefOrVectorOrMapNotation::Atom(v) => { - let db_id = Tx::<_>::parse_entid_or_lookup_ref_or_temp_id(&[v][..]) + let db_id = Tx::parse_entid_or_lookup_ref_or_temp_id(v) .chain_err(|| Error::from(ErrorKind::DbIdError))?; Some(db_id) }, @@ -236,7 +188,12 @@ mod tests { use std::collections::LinkedList; use combine::Parser; use edn::symbols::NamespacedKeyword; - use edn::types::Value; + use edn::{ + Span, + SpannedValue, + Value, + ValueAndSpan, + }; use mentat_tx::entities::{ Entid, EntidOrLookupRefOrTempId, @@ -253,108 +210,107 @@ mod tests { #[test] fn test_add() { - let input = [Value::Vector(vec![kw("db", "add"), - kw("test", "entid"), - kw("test", "a"), - Value::Text("v".into())])]; + let input = Value::Vector(vec![kw("db", "add"), + kw("test", "entid"), + kw("test", "a"), + Value::Text("v".into())]).with_spans(); let mut parser = Tx::entity(); - let result = parser.parse(&input[..]); + let result = parser.parse(input.into_atom_stream()).map(|x| x.0); assert_eq!(result, - Ok((Entity::AddOrRetract { + Ok(Entity::AddOrRetract { op: OpType::Add, e: EntidOrLookupRefOrTempId::Entid(Entid::Ident(NamespacedKeyword::new("test", "entid"))), a: Entid::Ident(NamespacedKeyword::new("test", "a")), - v: AtomOrLookupRefOrVectorOrMapNotation::Atom(Value::Text("v".into())), - }, - &[][..]))); + v: AtomOrLookupRefOrVectorOrMapNotation::Atom(ValueAndSpan::new(SpannedValue::Text("v".into()), Span(29, 32))), + })); } - #[test] - fn test_retract() { - let input = [Value::Vector(vec![kw("db", "retract"), - Value::Integer(101), - kw("test", "a"), - Value::Text("v".into())])]; - let mut parser = Tx::entity(); - let result = parser.parse(&input[..]); - assert_eq!(result, - Ok((Entity::AddOrRetract { - op: OpType::Retract, - e: EntidOrLookupRefOrTempId::Entid(Entid::Entid(101)), - a: Entid::Ident(NamespacedKeyword::new("test", "a")), - v: AtomOrLookupRefOrVectorOrMapNotation::Atom(Value::Text("v".into())), - }, - &[][..]))); - } + // #[test] + // fn test_retract() { + // let input = [Value::Vector(vec![kw("db", "retract"), + // Value::Integer(101), + // kw("test", "a"), + // Value::Text("v".into())])]; + // let mut parser = Tx::entity(); + // let result = parser.parse(&input[..]); + // assert_eq!(result, + // Ok((Entity::AddOrRetract { + // op: OpType::Retract, + // e: EntidOrLookupRefOrTempId::Entid(Entid::Entid(101)), + // a: Entid::Ident(NamespacedKeyword::new("test", "a")), + // v: AtomOrLookupRefOrVectorOrMapNotation::Atom(Value::Text("v".into())), + // }, + // &[][..]))); + // } - #[test] - fn test_lookup_ref() { - let mut list = LinkedList::new(); - list.push_back(Value::PlainSymbol(PlainSymbol::new("lookup-ref"))); - list.push_back(kw("test", "a1")); - list.push_back(Value::Text("v1".into())); + // #[test] + // fn test_lookup_ref() { + // let mut list = LinkedList::new(); + // list.push_back(Value::PlainSymbol(PlainSymbol::new("lookup-ref"))); + // list.push_back(kw("test", "a1")); + // list.push_back(Value::Text("v1".into())); - let input = [Value::Vector(vec![kw("db", "add"), - Value::List(list), - kw("test", "a"), - Value::Text("v".into())])]; - let mut parser = Tx::entity(); - let result = parser.parse(&input[..]); - assert_eq!(result, - Ok((Entity::AddOrRetract { - op: OpType::Add, - e: EntidOrLookupRefOrTempId::LookupRef(LookupRef { - a: Entid::Ident(NamespacedKeyword::new("test", "a1")), - v: Value::Text("v1".into()), - }), - a: Entid::Ident(NamespacedKeyword::new("test", "a")), - v: AtomOrLookupRefOrVectorOrMapNotation::Atom(Value::Text("v".into())), - }, - &[][..]))); - } + // let input = [Value::Vector(vec![kw("db", "add"), + // Value::List(list), + // kw("test", "a"), + // Value::Text("v".into())])]; + // let mut parser = Tx::entity(); + // let result = parser.parse(&input[..]); + // assert_eq!(result, + // Ok((Entity::AddOrRetract { + // op: OpType::Add, + // e: EntidOrLookupRefOrTempId::LookupRef(LookupRef { + // a: Entid::Ident(NamespacedKeyword::new("test", "a1")), + // v: Value::Text("v1".into()), + // }), + // a: Entid::Ident(NamespacedKeyword::new("test", "a")), + // v: AtomOrLookupRefOrVectorOrMapNotation::Atom(Value::Text("v".into())), + // }, + // &[][..]))); + // } - #[test] - fn test_nested_vector() { - let mut list = LinkedList::new(); - list.push_back(Value::PlainSymbol(PlainSymbol::new("lookup-ref"))); - list.push_back(kw("test", "a1")); - list.push_back(Value::Text("v1".into())); + // #[test] + // fn test_nested_vector() { + // let mut list = LinkedList::new(); + // list.push_back(Value::PlainSymbol(PlainSymbol::new("lookup-ref"))); + // list.push_back(kw("test", "a1")); + // list.push_back(Value::Text("v1".into())); - let input = [Value::Vector(vec![kw("db", "add"), - Value::List(list), - kw("test", "a"), - Value::Vector(vec![Value::Text("v1".into()), Value::Text("v2".into())])])]; - let mut parser = Tx::entity(); - let result = parser.parse(&input[..]); - assert_eq!(result, - Ok((Entity::AddOrRetract { - op: OpType::Add, - e: EntidOrLookupRefOrTempId::LookupRef(LookupRef { - a: Entid::Ident(NamespacedKeyword::new("test", "a1")), - v: Value::Text("v1".into()), - }), - a: Entid::Ident(NamespacedKeyword::new("test", "a")), - v: AtomOrLookupRefOrVectorOrMapNotation::Vector(vec![AtomOrLookupRefOrVectorOrMapNotation::Atom(Value::Text("v1".into())), - AtomOrLookupRefOrVectorOrMapNotation::Atom(Value::Text("v2".into()))]), - }, - &[][..]))); - } + // let input = [Value::Vector(vec![kw("db", "add"), + // Value::List(list), + // kw("test", "a"), + // Value::Vector(vec![Value::Text("v1".into()), Value::Text("v2".into())])])]; + // let mut parser = Tx::entity(); + // let result = parser.parse(&input[..]); + // assert_eq!(result, + // Ok((Entity::AddOrRetract { + // op: OpType::Add, + // e: EntidOrLookupRefOrTempId::LookupRef(LookupRef { + // a: Entid::Ident(NamespacedKeyword::new("test", "a1")), + // v: Value::Text("v1".into()), + // }), + // a: Entid::Ident(NamespacedKeyword::new("test", "a")), + // v: AtomOrLookupRefOrVectorOrMapNotation::Vector(vec![AtomOrLookupRefOrVectorOrMapNotation::Atom(Value::Text("v1".into())), + // AtomOrLookupRefOrVectorOrMapNotation::Atom(Value::Text("v2".into()))]), + // }, + // &[][..]))); + // } - #[test] - fn test_map_notation() { - let mut expected: MapNotation = BTreeMap::default(); - expected.insert(Entid::Ident(NamespacedKeyword::new("db", "id")), AtomOrLookupRefOrVectorOrMapNotation::Atom(Value::Text("t".to_string()))); - expected.insert(Entid::Ident(NamespacedKeyword::new("db", "ident")), AtomOrLookupRefOrVectorOrMapNotation::Atom(kw("test", "attribute"))); + // #[test] + // fn test_map_notation() { + // let mut expected: MapNotation = BTreeMap::default(); + // expected.insert(Entid::Ident(NamespacedKeyword::new("db", "id")), AtomOrLookupRefOrVectorOrMapNotation::Atom(Value::Text("t".to_string()))); + // expected.insert(Entid::Ident(NamespacedKeyword::new("db", "ident")), AtomOrLookupRefOrVectorOrMapNotation::Atom(kw("test", "attribute"))); - let mut map: BTreeMap = BTreeMap::default(); - map.insert(kw("db", "id"), Value::Text("t".to_string())); - map.insert(kw("db", "ident"), kw("test", "attribute")); - let input = [Value::Map(map.clone())]; - let mut parser = Tx::entity(); - let result = parser.parse(&input[..]); - assert_eq!(result, - Ok((Entity::MapNotation(expected), - &[][..]))); - } + // let mut map: BTreeMap = BTreeMap::default(); + // map.insert(kw("db", "id"), Value::Text("t".to_string())); + // map.insert(kw("db", "ident"), kw("test", "attribute")); + // let input = [Value::Map(map.clone())]; + // let mut parser = Tx::entity(); + // let result = parser.parse(&input[..]); + // assert_eq!(result, + // Ok((Entity::MapNotation(expected), + // &[][..]))); + // } } diff --git a/tx-parser/tests/parser.rs b/tx-parser/tests/parser.rs index ed1210c5..7abd2819 100644 --- a/tx-parser/tests/parser.rs +++ b/tx-parser/tests/parser.rs @@ -15,7 +15,6 @@ extern crate mentat_tx_parser; use edn::parse; use edn::symbols::NamespacedKeyword; -use edn::types::Value; use mentat_tx::entities::{ AtomOrLookupRefOrVectorOrMapNotation, Entid, @@ -33,29 +32,28 @@ fn test_entities() { [:db/add "tempid" :test/a "v"] [:db/retract 102 :test/b "w"]]"#; - let edn = parse::value(input).unwrap().without_spans(); - let input = [edn]; + let edn = parse::value(input).expect("to parse test input"); - let result = Tx::parse(&input[..]); + let result = Tx::parse(edn); assert_eq!(result.unwrap(), vec![ Entity::AddOrRetract { op: OpType::Add, e: EntidOrLookupRefOrTempId::Entid(Entid::Entid(101)), a: Entid::Ident(NamespacedKeyword::new("test", "a")), - v: AtomOrLookupRefOrVectorOrMapNotation::Atom(Value::Text("v".into())), + v: AtomOrLookupRefOrVectorOrMapNotation::Atom(edn::ValueAndSpan::new(edn::SpannedValue::Text("v".into()), edn::Span(23, 26))), }, Entity::AddOrRetract { op: OpType::Add, e: EntidOrLookupRefOrTempId::TempId(TempId::External("tempid".into())), a: Entid::Ident(NamespacedKeyword::new("test", "a")), - v: AtomOrLookupRefOrVectorOrMapNotation::Atom(Value::Text("v".into())), + v: AtomOrLookupRefOrVectorOrMapNotation::Atom(edn::ValueAndSpan::new(edn::SpannedValue::Text("v".into()), edn::Span(55, 58))), }, Entity::AddOrRetract { op: OpType::Retract, e: EntidOrLookupRefOrTempId::Entid(Entid::Entid(102)), a: Entid::Ident(NamespacedKeyword::new("test", "b")), - v: AtomOrLookupRefOrVectorOrMapNotation::Atom(Value::Text("w".into())), + v: AtomOrLookupRefOrVectorOrMapNotation::Atom(edn::ValueAndSpan::new(edn::SpannedValue::Text("w".into()), edn::Span(86, 89))), }, ]); } diff --git a/tx/src/entities.rs b/tx/src/entities.rs index 094edea5..a943e49c 100644 --- a/tx/src/entities.rs +++ b/tx/src/entities.rs @@ -15,7 +15,6 @@ extern crate edn; use std::collections::BTreeMap; use std::fmt; -use self::edn::types::Value; use self::edn::symbols::NamespacedKeyword; /// A tempid, either an external tempid given in a transaction (usually as an `edn::Value::Text`), @@ -62,14 +61,14 @@ pub struct LookupRef { pub a: Entid, // In theory we could allow nested lookup-refs. In practice this would require us to process // lookup-refs in multiple phases, like how we resolve tempids, which isn't worth the effort. - pub v: Value, // An atom. + pub v: edn::Value, // An atom. } pub type MapNotation = BTreeMap; #[derive(Clone, Debug, Eq, Hash, Ord, PartialOrd, PartialEq)] pub enum AtomOrLookupRefOrVectorOrMapNotation { - Atom(Value), + Atom(edn::ValueAndSpan), LookupRef(LookupRef), Vector(Vec), MapNotation(MapNotation), From ff136b254681133b0e1ad15cec0cf0023f72ffc6 Mon Sep 17 00:00:00 2001 From: Nick Alexander Date: Tue, 28 Mar 2017 18:04:59 -0700 Subject: [PATCH 07/18] Part 3: Use `value_and_span` apparatus in query-parser/. --- query-parser/src/find.rs | 261 ------------- query-parser/src/lib.rs | 9 +- query-parser/src/parse.rs | 611 ++++++++++++++++--------------- query-parser/src/util.rs | 151 -------- query-parser/tests/find_tests.rs | 2 +- query/src/lib.rs | 61 +-- 6 files changed, 346 insertions(+), 749 deletions(-) delete mode 100644 query-parser/src/find.rs delete mode 100644 query-parser/src/util.rs diff --git a/query-parser/src/find.rs b/query-parser/src/find.rs deleted file mode 100644 index f3c1c22a..00000000 --- a/query-parser/src/find.rs +++ /dev/null @@ -1,261 +0,0 @@ -// Copyright 2016 Mozilla -// -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use -// this file except in compliance with the License. You may obtain a copy of the -// License at http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed -// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. - -/// ! This module defines the interface and implementation for parsing an EDN -/// ! input into a structured Datalog query. -/// ! -/// ! The query types are defined in the `query` crate, because they -/// ! are shared between the parser (EDN -> query), the translator -/// ! (query -> SQL), and the executor (query, SQL -> running code). -/// ! -/// ! The query input can be in two forms: a 'flat' human-oriented -/// ! sequence: -/// ! -/// ! ```clojure -/// ! [:find ?y :in $ ?x :where [?x :foaf/knows ?y]] -/// ! ``` -/// ! -/// ! or a more programmatically generable map: -/// ! -/// ! ```clojure -/// ! {:find [?y] -/// ! :in [$] -/// ! :where [[?x :foaf/knows ?y]]} -/// ! ``` -/// ! -/// ! We parse by expanding the array format into four parts, treating them as the four -/// ! parts of the map. - -extern crate edn; -extern crate mentat_parser_utils; -extern crate mentat_query; - -use std::collections::BTreeMap; - -use self::mentat_query::{ - FindQuery, - FnArg, - FromValue, - Predicate, - PredicateFn, - SrcVar, - Variable, -}; - -use self::mentat_parser_utils::ValueParseError; - -use super::parse::{ - ErrorKind, - QueryParseResult, - Result, - clause_seq_to_patterns, -}; - -use super::util::vec_to_keyword_map; - -/// If the provided slice of EDN values are all variables as -/// defined by `value_to_variable`, return a `Vec` of `Variable`s. -/// Otherwise, return the unrecognized Value in a `NotAVariableError`. -fn values_to_variables(vals: &[edn::Value]) -> Result> { - let mut out: Vec = Vec::with_capacity(vals.len()); - for v in vals { - if let Some(var) = Variable::from_value(v) { - out.push(var); - continue; - } - bail!(ErrorKind::NotAVariableError(v.clone())); - } - return Ok(out); -} - -#[allow(unused_variables)] -fn parse_find_parts(find: &[edn::Value], - ins: Option<&[edn::Value]>, - with: Option<&[edn::Value]>, - wheres: &[edn::Value]) - -> QueryParseResult { - // :find must be an array of plain var symbols (?foo), pull expressions, and aggregates. - // For now we only support variables and the annotations necessary to declare which - // flavor of :find we want: - // ?x ?y ?z = FindRel - // [?x ...] = FindColl - // ?x . = FindScalar - // [?x ?y ?z] = FindTuple - // - // :in must be an array of sources ($), rules (%), and vars (?). For now we only support the - // default source. :in can be omitted, in which case the default is equivalent to `:in $`. - // TODO: process `ins`. - let source = SrcVar::DefaultSrc; - - // :with is an array of variables. This is simple, so we don't use a parser. - let with_vars = if let Some(vals) = with { - values_to_variables(vals)? - } else { - vec![] - }; - - // :wheres is a whole datastructure. - let where_clauses = clause_seq_to_patterns(wheres)?; - - super::parse::find_seq_to_find_spec(find) - .map(|spec| { - FindQuery { - find_spec: spec, - default_source: source, - with: with_vars, - in_vars: vec![], // TODO - in_sources: vec![], // TODO - where_clauses: where_clauses, - } - }) -} - -fn parse_find_map(map: BTreeMap>) -> QueryParseResult { - // Eagerly awaiting `const fn`. - let kw_find = edn::Keyword::new("find"); - let kw_in = edn::Keyword::new("in"); - let kw_with = edn::Keyword::new("with"); - let kw_where = edn::Keyword::new("where"); - - // Oh, if only we had `guard`. - if let Some(find) = map.get(&kw_find) { - if let Some(wheres) = map.get(&kw_where) { - parse_find_parts(find, - map.get(&kw_in).map(|x| x.as_slice()), - map.get(&kw_with).map(|x| x.as_slice()), - wheres) - } else { - bail!(ErrorKind::MissingFieldError(kw_where)) - } - } else { - bail!(ErrorKind::MissingFieldError(kw_find)) - } -} - -fn parse_find_edn_map(map: BTreeMap) -> QueryParseResult { - // Every key must be a Keyword. Every value must be a Vec. - let mut m = BTreeMap::new(); - - if map.is_empty() { - return parse_find_map(m); - } - - for (k, v) in map { - if let edn::Value::Keyword(kw) = k { - if let edn::Value::Vector(vec) = v { - m.insert(kw, vec); - continue; - } else { - bail!(ErrorKind::InvalidInputError(v)) - } - } else { - bail!(ErrorKind::InvalidInputError(k)) - } - } - - parse_find_map(m) -} - -pub fn parse_find_string(string: &str) -> QueryParseResult { - let expr = edn::parse::value(string)?; - parse_find(expr.without_spans()) -} - -pub fn parse_find(expr: edn::Value) -> QueryParseResult { - // No `match` because scoping and use of `expr` in error handling is nuts. - if let edn::Value::Map(m) = expr { - return parse_find_edn_map(m); - } - if let edn::Value::Vector(ref v) = expr { - if let Some(m) = vec_to_keyword_map(v) { - return parse_find_map(m); - } - } - bail!(ErrorKind::InvalidInputError(expr)) -} - -#[cfg(test)] -mod test_parse { - extern crate edn; - - use std::rc::Rc; - - use self::edn::{NamespacedKeyword, PlainSymbol}; - use self::edn::types::Value; - use super::mentat_query::{ - Element, - FindSpec, - Pattern, - PatternNonValuePlace, - PatternValuePlace, - SrcVar, - Variable, - WhereClause, - }; - use super::*; - - // TODO: when #224 lands, fix to_keyword to be variadic. - #[test] - fn test_parse_find() { - let truncated_input = edn::Value::Vector(vec![Value::from_keyword(None, "find")]); - assert!(parse_find(truncated_input).is_err()); - - let input = - edn::Value::Vector(vec![Value::from_keyword(None, "find"), - Value::from_symbol(None, "?x"), - Value::from_symbol(None, "?y"), - Value::from_keyword(None, "where"), - edn::Value::Vector(vec![Value::from_symbol(None, "?x"), - Value::from_keyword("foo", "bar"), - Value::from_symbol(None, "?y")])]); - - let parsed = parse_find(input).unwrap(); - if let FindSpec::FindRel(elems) = parsed.find_spec { - assert_eq!(2, elems.len()); - assert_eq!(vec![Element::Variable(Variable::from_valid_name("?x")), - Element::Variable(Variable::from_valid_name("?y"))], - elems); - } else { - panic!("Expected FindRel."); - } - - assert_eq!(SrcVar::DefaultSrc, parsed.default_source); - assert_eq!(parsed.where_clauses, - vec![ - WhereClause::Pattern(Pattern { - source: None, - entity: PatternNonValuePlace::Variable(Variable::from_valid_name("?x")), - attribute: PatternNonValuePlace::Ident(Rc::new(NamespacedKeyword::new("foo", "bar"))), - value: PatternValuePlace::Variable(Variable::from_valid_name("?y")), - tx: PatternNonValuePlace::Placeholder, - })]); - } - - #[test] - fn test_parse_predicate() { - let input = "[:find ?x :where [?x :foo/bar ?y] [[< ?y 10]]]"; - let parsed = parse_find_string(input).unwrap(); - assert_eq!(parsed.where_clauses, - vec![ - WhereClause::Pattern(Pattern { - source: None, - entity: PatternNonValuePlace::Variable(Variable::from_valid_name("?x")), - attribute: PatternNonValuePlace::Ident(Rc::new(NamespacedKeyword::new("foo", "bar"))), - value: PatternValuePlace::Variable(Variable::from_valid_name("?y")), - tx: PatternNonValuePlace::Placeholder, - }), - WhereClause::Pred(Predicate { - operator: PlainSymbol::new("<"), - args: vec![FnArg::Variable(Variable::from_valid_name("?y")), - FnArg::EntidOrInteger(10)], - }), - ]); - } -} diff --git a/query-parser/src/lib.rs b/query-parser/src/lib.rs index 4a24843a..8158d279 100644 --- a/query-parser/src/lib.rs +++ b/query-parser/src/lib.rs @@ -21,19 +21,12 @@ extern crate edn; #[macro_use] extern crate mentat_parser_utils; -mod util; mod parse; -pub mod find; - -pub use find::{ - parse_find, - parse_find_string, -}; pub use parse::{ Error, ErrorKind, - QueryParseResult, Result, ResultExt, + parse_find_string, }; diff --git a/query-parser/src/parse.rs b/query-parser/src/parse.rs index 21305ca9..1ba01b98 100644 --- a/query-parser/src/parse.rs +++ b/query-parser/src/parse.rs @@ -13,14 +13,27 @@ extern crate edn; extern crate mentat_parser_utils; extern crate mentat_query; -use self::combine::{eof, many, many1, optional, parser, satisfy_map, Parser, ParseResult, Stream}; -use self::combine::combinator::{choice, try}; +use std; // To refer to std::result::Result. + +use self::combine::{eof, many, many1, optional, parser, satisfy, satisfy_map, Parser, ParseResult, Stream}; +use self::combine::combinator::{choice, or, try}; use self::mentat_parser_utils::{ ResultParser, ValueParseError, }; +use self::mentat_parser_utils::value_and_span::Stream as ValueStream; +use self::mentat_parser_utils::value_and_span::{ + Item, + OfParsing, + keyword_map, + list, + map, + seq, + vector, +}; + use self::mentat_query::{ Element, FindQuery, @@ -50,17 +63,17 @@ error_chain! { } errors { - NotAVariableError(value: edn::Value) { + NotAVariableError(value: edn::ValueAndSpan) { description("not a variable") display("not a variable: '{}'", value) } - FindParseError(e: ValueParseError) { + FindParseError(e: combine::ParseError) { description(":find parse error") display(":find parse error") } - WhereParseError(e: ValueParseError) { + WhereParseError(e: combine::ParseError) { description(":where parse error") display(":where parse error") } @@ -83,321 +96,349 @@ error_chain! { } } -pub type WhereParseResult = Result>; -pub type FindParseResult = Result; -pub type QueryParseResult = Result; +pub struct Query; -pub struct Query(::std::marker::PhantomData I>); +def_parser!(Query, variable, Variable, { + satisfy_map(Variable::from_value) +}); -impl Query - where I: Stream -{ - fn to_parsed_value(r: ParseResult) -> Option { - r.ok().map(|x| x.0) - } -} +def_parser!(Query, source_var, SrcVar, { + satisfy_map(SrcVar::from_value) +}); // TODO: interning. -def_value_satisfy_parser_fn!(Query, variable, Variable, Variable::from_value); -def_value_satisfy_parser_fn!(Query, source_var, SrcVar, SrcVar::from_value); -def_value_satisfy_parser_fn!(Query, predicate_fn, PredicateFn, PredicateFn::from_value); -def_value_satisfy_parser_fn!(Query, fn_arg, FnArg, FnArg::from_value); +def_parser!(Query, predicate_fn, PredicateFn, { + satisfy_map(PredicateFn::from_value) +}); -pub struct Where(::std::marker::PhantomData I>); +def_parser!(Query, fn_arg, FnArg, { + satisfy_map(FnArg::from_value) +}); -def_value_satisfy_parser_fn!(Where, - pattern_value_place, - PatternValuePlace, - PatternValuePlace::from_value); -def_value_satisfy_parser_fn!(Where, - pattern_non_value_place, - PatternNonValuePlace, - PatternNonValuePlace::from_value); +def_parser!(Query, arguments, Vec, { + (many::, _>(Query::fn_arg())) +}); -fn seq>>(x: T) -> Option> { - match x.into() { - Some(edn::Value::List(items)) => Some(items.into_iter().collect()), - Some(edn::Value::Vector(items)) => Some(items), - _ => None, - } -} +pub struct Where; -/// Take a vector Value containing one vector Value, and return the `Vec` inside the inner vector. -/// Also accepts an inner list, returning it as a `Vec`. -fn unwrap_nested(x: edn::Value) -> Option> { - match x { - edn::Value::Vector(mut v) => { - seq(v.pop()) +def_parser!(Where, pattern_value_place, PatternValuePlace, { + satisfy_map(PatternValuePlace::from_value) +}); + +def_parser!(Where, pattern_non_value_place, PatternNonValuePlace, { + satisfy_map(PatternNonValuePlace::from_value) +}); + + +def_parser!(Where, and, edn::ValueAndSpan, { + satisfy(|v: edn::ValueAndSpan| { + if let edn::SpannedValue::PlainSymbol(ref s) = v.inner { + s.0.as_str() == "and" + } else { + false } - _ => None, - } -} - -def_value_parser_fn!(Where, and, (), input, { - matches_plain_symbol!("and", input) + }) }); -def_value_parser_fn!(Where, or, (), input, { - matches_plain_symbol!("or", input) +def_parser!(Where, or, edn::ValueAndSpan, { + satisfy(|v: edn::ValueAndSpan| { + if let edn::SpannedValue::PlainSymbol(ref s) = v.inner { + s.0.as_str() == "or" + } else { + false + } + }) }); -def_value_parser_fn!(Where, or_join, (), input, { - matches_plain_symbol!("or-join", input) +def_parser!(Where, or_join, edn::ValueAndSpan, { + satisfy(|v: edn::ValueAndSpan| { + if let edn::SpannedValue::PlainSymbol(ref s) = v.inner { + s.0.as_str() == "or-join" + } else { + false + } + }) }); -def_value_parser_fn!(Where, rule_vars, Vec, input, { - satisfy_map(|x: edn::Value| { - seq(x).and_then(|items| { - let mut p = many1(Query::variable()).skip(eof()); - Query::to_parsed_value(p.parse_lazy(&items[..]).into()) - })}).parse_stream(input) +def_parser!(Where, rule_vars, Vec, { + seq() + .of(many1(Query::variable())) }); -def_value_parser_fn!(Where, or_pattern_clause, OrWhereClause, input, { - Where::clause().map(|clause| OrWhereClause::Clause(clause)).parse_stream(input) +def_parser!(Where, or_pattern_clause, OrWhereClause, { + Where::clause().map(|clause| OrWhereClause::Clause(clause)) }); -def_value_parser_fn!(Where, or_and_clause, OrWhereClause, input, { - satisfy_map(|x: edn::Value| { - seq(x).and_then(|items| { - let mut p = Where::and() - .with(many1(Where::clause())) - .skip(eof()) - .map(OrWhereClause::And); - let r: ParseResult = p.parse_lazy(&items[..]).into(); - Query::to_parsed_value(r) - }) - }).parse_stream(input) +def_parser!(Where, or_and_clause, OrWhereClause, { + seq() + .of(Where::and() + .with(many1(Where::clause())) + .map(OrWhereClause::And)) }); -def_value_parser_fn!(Where, or_where_clause, OrWhereClause, input, { - choice([Where::or_pattern_clause(), Where::or_and_clause()]).parse_stream(input) +def_parser!(Where, or_where_clause, OrWhereClause, { + choice([Where::or_pattern_clause(), Where::or_and_clause()]) }); -def_value_parser_fn!(Where, or_clause, WhereClause, input, { - satisfy_map(|x: edn::Value| { - seq(x).and_then(|items| { - let mut p = Where::or() - .with(many1(Where::or_where_clause())) - .skip(eof()) - .map(|clauses| { - WhereClause::OrJoin( - OrJoin { - unify_vars: UnifyVars::Implicit, - clauses: clauses, - }) - }); - let r: ParseResult = p.parse_lazy(&items[..]).into(); - Query::to_parsed_value(r) - }) - }).parse_stream(input) +def_parser!(Where, or_clause, WhereClause, { + seq() + .of(Where::or() + .with(many1(Where::or_where_clause())) + .map(|clauses| { + WhereClause::OrJoin( + OrJoin { + unify_vars: UnifyVars::Implicit, + clauses: clauses, + }) + })) }); -def_value_parser_fn!(Where, or_join_clause, WhereClause, input, { - satisfy_map(|x: edn::Value| { - seq(x).and_then(|items| { - let mut p = Where::or_join() - .with(Where::rule_vars()) - .and(many1(Where::or_where_clause())) - .skip(eof()) - .map(|(vars, clauses)| { - WhereClause::OrJoin( - OrJoin { - unify_vars: UnifyVars::Explicit(vars), - clauses: clauses, - }) - }); - let r: ParseResult = p.parse_lazy(&items[..]).into(); - Query::to_parsed_value(r) - }) - }).parse_stream(input) +def_parser!(Where, or_join_clause, WhereClause, { + seq() + .of(Where::or_join() + .with(Where::rule_vars()) + .and(many1(Where::or_where_clause())) + .map(|(vars, clauses)| { + WhereClause::OrJoin( + OrJoin { + unify_vars: UnifyVars::Explicit(vars), + clauses: clauses, + }) + })) }); /// A vector containing just a parenthesized filter expression. -def_value_parser_fn!(Where, pred, WhereClause, input, { - satisfy_map(|x: edn::Value| { - // Accept either a list or a vector here: - // `[(foo ?x ?y)]` or `[[foo ?x ?y]]` - unwrap_nested(x).and_then(|items| { - let mut p = (Query::predicate_fn(), Query::arguments()) - .skip(eof()) - .map(|(f, args)| { - WhereClause::Pred( - Predicate { - operator: f.0, - args: args, - }) - }); - let r: ParseResult = p.parse_lazy(&items[..]).into(); - Query::to_parsed_value(r) - }) - }).parse_stream(input) +def_parser!(Where, pred, WhereClause, { + // Accept either a nested list or a nested vector here: + // `[(foo ?x ?y)]` or `[[foo ?x ?y]]` + vector() + .of(seq() + .of((Query::predicate_fn(), Query::arguments()) + .map(|(f, args)| { + WhereClause::Pred( + Predicate { + operator: f.0, + args: args, + }) + }))) }); -def_value_parser_fn!(Where, pattern, WhereClause, input, { - satisfy_map(|x: edn::Value| { - if let edn::Value::Vector(y) = x { +def_parser!(Where, pattern, WhereClause, { + vector() + .of( // While *technically* Datomic allows you to have a query like: // [:find … :where [[?x]]] // We don't -- we require at least e, a. - let mut p = (optional(Query::source_var()), // src - Where::pattern_non_value_place(), // e - Where::pattern_non_value_place(), // a - optional(Where::pattern_value_place()), // v - optional(Where::pattern_non_value_place())) // tx - .skip(eof()) - .map(|(src, e, a, v, tx)| { - let v = v.unwrap_or(PatternValuePlace::Placeholder); - let tx = tx.unwrap_or(PatternNonValuePlace::Placeholder); + (optional(Query::source_var()), // src + Where::pattern_non_value_place(), // e + Where::pattern_non_value_place(), // a + optional(Where::pattern_value_place()), // v + optional(Where::pattern_non_value_place())) // tx + .and_then(|(src, e, a, v, tx)| { + let v = v.unwrap_or(PatternValuePlace::Placeholder); + let tx = tx.unwrap_or(PatternNonValuePlace::Placeholder); - // Pattern::new takes care of reversal of reversed - // attributes: [?x :foo/_bar ?y] turns into - // [?y :foo/bar ?x]. - Pattern::new(src, e, a, v, tx).map(WhereClause::Pattern) - }); - - // This is a bit messy: the inner conversion to a Pattern can - // fail if the input is something like - // - // ```edn - // [?x :foo/_reversed 23.4] - // ``` - // - // because - // - // ```edn - // [23.4 :foo/reversed ?x] - // ``` - // - // is nonsense. That leaves us with nested optionals; we unwrap them here. - let r: ParseResult, _> = p.parse_lazy(&y[..]).into(); - let v: Option> = Query::to_parsed_value(r); - v.unwrap_or(None) - } else { - None - } - }).parse_stream(input) + // Pattern::new takes care of reversal of reversed + // attributes: [?x :foo/_bar ?y] turns into + // [?y :foo/bar ?x]. + // + // This is a bit messy: the inner conversion to a Pattern can + // fail if the input is something like + // + // ```edn + // [?x :foo/_reversed 23.4] + // ``` + // + // because + // + // ```edn + // [23.4 :foo/reversed ?x] + // ``` + // + // is nonsense. That leaves us with a nested optional, which we unwrap here. + Pattern::new(src, e, a, v, tx) + .map(WhereClause::Pattern) + .ok_or(combine::primitives::Error::Expected("pattern".into())) + })) }); -def_value_parser_fn!(Query, arguments, Vec, input, { - (many::, _>(Query::fn_arg())) - .skip(eof()) - .parse_stream(input) -}); - -def_value_parser_fn!(Where, clause, WhereClause, input, { - choice([Where::pattern(), - Where::pred(), +def_parser!(Where, clause, WhereClause, { + choice([try(Where::pattern()), // It's either // (or-join [vars] clauses…) // or // (or clauses…) // We don't yet handle source vars. - Where::or_join_clause(), - Where::or_clause(), - ]).parse_stream(input) + try(Where::or_join_clause()), + try(Where::or_clause()), + + try(Where::pred()), + ]) }); -def_value_parser_fn!(Where, clauses, Vec, input, { +def_parser!(Where, clauses, Vec, { // Right now we only support patterns and predicates. See #239 for more. (many1::, _>(Where::clause())) - .skip(eof()) - .parse_stream(input) }); -pub struct Find(::std::marker::PhantomData I>); +pub struct Find; -def_value_parser_fn!(Find, period, (), input, { - matches_plain_symbol!(".", input) +/// TODO: extract macro for matching these `PlainSymbol` instances. +def_parser!(Find, period, edn::ValueAndSpan, { + satisfy(|v: edn::ValueAndSpan| { + if let edn::SpannedValue::PlainSymbol(ref s) = v.inner { + s.0.as_str() == "." + } else { + false + } + }) }); -def_value_parser_fn!(Find, ellipsis, (), input, { - matches_plain_symbol!("...", input) +def_parser!(Find, ellipsis, edn::ValueAndSpan, { + satisfy(|v: edn::ValueAndSpan| { + if let edn::SpannedValue::PlainSymbol(ref s) = v.inner { + s.0.as_str() == "..." + } else { + false + } + }) }); -def_value_parser_fn!(Find, find_scalar, FindSpec, input, { +def_parser!(Find, find_scalar, FindSpec, { Query::variable() + .map(|var| FindSpec::FindScalar(Element::Variable(var))) .skip(Find::period()) .skip(eof()) - .map(|var| FindSpec::FindScalar(Element::Variable(var))) - .parse_stream(input) }); -def_value_parser_fn!(Find, find_coll, FindSpec, input, { - satisfy_unwrap!(edn::Value::Vector, y, { - let mut p = Query::variable() - .skip(Find::ellipsis()) - .skip(eof()) - .map(|var| FindSpec::FindColl(Element::Variable(var))); - let r: ParseResult = p.parse_lazy(&y[..]).into(); - Query::to_parsed_value(r) - }) - .parse_stream(input) +def_parser!(Find, find_coll, FindSpec, { + vector() + .of(Query::variable() + .map(|var| FindSpec::FindColl(Element::Variable(var))) + .skip(Find::ellipsis())) }); -def_value_parser_fn!(Find, elements, Vec, input, { - many1::, _>(Query::variable()).skip(eof()) - .map(|vars| { - vars.into_iter() - .map(Element::Variable) - .collect() - }) - .parse_stream(input) +def_parser!(Find, elements, Vec, { + many1::, _>(Query::variable().map(Element::Variable)) }); -def_value_parser_fn!(Find, find_rel, FindSpec, input, { - Find::elements().map(FindSpec::FindRel).parse_stream(input) +def_parser!(Find, find_rel, FindSpec, { + Find::elements().map(FindSpec::FindRel) }); -def_value_parser_fn!(Find, find_tuple, FindSpec, input, { - satisfy_unwrap!(edn::Value::Vector, y, { - let r: ParseResult = - Find::elements().map(FindSpec::FindTuple).parse_lazy(&y[..]).into(); - Query::to_parsed_value(r) - }) - .parse_stream(input) +def_parser!(Find, find_tuple, FindSpec, { + vector() + .of(Find::elements().map(FindSpec::FindTuple)) }); -def_value_parser_fn!(Find, find, FindSpec, input, { - // Any one of the four specs might apply, so we combine them with `choice`. - // Our parsers consume input, so we need to wrap them in `try` so that they - // operate independently. - choice::<[&mut Parser; 4], _> +/// Parse a stream values into one of four find specs. +/// +/// `:find` must be an array of plain var symbols (?foo), pull expressions, and aggregates. For now +/// we only support variables and the annotations necessary to declare which flavor of :find we +/// want: +/// +/// +/// `?x ?y ?z ` = FindRel +/// `[?x ...] ` = FindColl +/// `?x . ` = FindScalar +/// `[?x ?y ?z]` = FindTuple +def_parser!(Find, spec, FindSpec, { + // Any one of the four specs might apply, so we combine them with `choice`. Our parsers consume + // input, so we need to wrap them in `try` so that they operate independently. + choice::<[&mut Parser; 4], _> ([&mut try(Find::find_scalar()), &mut try(Find::find_coll()), &mut try(Find::find_tuple()), &mut try(Find::find_rel())]) - .parse_stream(input) }); -// Parse a sequence of values into one of four find specs. -// -// `:find` must be an array of plain var symbols (?foo), pull expressions, and aggregates. -// For now we only support variables and the annotations necessary to declare which -// flavor of :find we want: -// -// -// `?x ?y ?z ` = FindRel -// `[?x ...] ` = FindColl -// `?x . ` = FindScalar -// `[?x ?y ?z]` = FindTuple -// -#[allow(dead_code)] -pub fn find_seq_to_find_spec(find: &[edn::Value]) -> FindParseResult { - Find::find() - .parse(find) - .map(|x| x.0) - .map_err::(|e| e.translate_position(find).into()) - .map_err(|e| Error::from_kind(ErrorKind::FindParseError(e))) +/// TODO: extract macro for matching these `Keyword` instances. +def_parser!(Find, literal_find, edn::ValueAndSpan, { + satisfy(|v: edn::ValueAndSpan| { + if let edn::SpannedValue::Keyword(ref s) = v.inner { + s.0.as_str() == "find" + } else { + false + } + }) +}); + +def_parser!(Find, literal_with, edn::ValueAndSpan, { + satisfy(|v: edn::ValueAndSpan| { + if let edn::SpannedValue::Keyword(ref s) = v.inner { + s.0.as_str() == "with" + } else { + false + } + }) +}); + +def_parser!(Find, literal_where, edn::ValueAndSpan, { + satisfy(|v: edn::ValueAndSpan| { + if let edn::SpannedValue::Keyword(ref s) = v.inner { + s.0.as_str() == "where" + } else { + false + } + }) +}); + +/// Express something close to a builder pattern for a `FindQuery`. +enum FindQueryPart { + FindSpec(FindSpec), + With(Vec), + WhereClauses(Vec), } -#[allow(dead_code)] -pub fn clause_seq_to_patterns(clauses: &[edn::Value]) -> WhereParseResult { - Where::clauses() - .parse(clauses) +/// This is awkward, but will do for now. We use `keyword_map()` to optionally accept vector find +/// queries, then we use `FindQueryPart` to collect parts that have heterogeneous types; and then we +/// construct a `FindQuery` from them. +def_parser!(Find, query, FindQuery, { + let p_find_spec = Find::literal_find() + .with(vector().of(Find::spec().map(FindQueryPart::FindSpec))); + + let p_with_vars = Find::literal_with() + .with(vector().of(many(Query::variable()).map(FindQueryPart::With))); + + let p_where_clauses = Find::literal_where() + .with(vector().of(Where::clauses().map(FindQueryPart::WhereClauses))).expected(":where clauses"); + + (or(map(), keyword_map())) + .of(many(choice::<[&mut Parser; 3], _>([ + &mut try(p_find_spec), + &mut try(p_with_vars), + &mut try(p_where_clauses), + ]))) + .and_then(|parts: Vec| -> std::result::Result> { + let mut find_spec = None; + let mut with_vars = None; + let mut where_clauses = None; + + for part in parts { + match part { + FindQueryPart::FindSpec(x) => find_spec = Some(x), + FindQueryPart::With(x) => with_vars = Some(x), + FindQueryPart::WhereClauses(x) => where_clauses = Some(x), + } + } + + Ok(FindQuery { + find_spec: find_spec.clone().ok_or(combine::primitives::Error::Unexpected("expected :find".into()))?, + default_source: SrcVar::DefaultSrc, + with: with_vars.unwrap_or(vec![]), + in_vars: vec![], // TODO + in_sources: vec![], // TODO + where_clauses: where_clauses.ok_or(combine::primitives::Error::Unexpected("expected :where".into()))?, + }) + }) +}); + +pub fn parse_find_string(string: &str) -> Result { + let expr = edn::parse::value(string)?; + Find::query() + .parse(expr.into_atom_stream()) .map(|x| x.0) - .map_err::(|e| e.translate_position(clauses).into()) - .map_err(|e| Error::from_kind(ErrorKind::WhereParseError(e))) + .map_err(|e| Error::from_kind(ErrorKind::FindParseError(e))) } #[cfg(test)] @@ -441,10 +482,10 @@ mod test { let a = edn::NamespacedKeyword::new("foo", "bar"); let v = OrderedFloat(99.9); let tx = edn::PlainSymbol::new("?tx"); - let input = [edn::Value::Vector(vec!(edn::Value::PlainSymbol(e.clone()), - edn::Value::NamespacedKeyword(a.clone()), - edn::Value::Float(v.clone()), - edn::Value::PlainSymbol(tx.clone())))]; + let input = edn::Value::Vector(vec!(edn::Value::PlainSymbol(e.clone()), + edn::Value::NamespacedKeyword(a.clone()), + edn::Value::Float(v.clone()), + edn::Value::PlainSymbol(tx.clone()))); assert_parses_to!(Where::pattern, input, WhereClause::Pattern(Pattern { source: None, entity: PatternNonValuePlace::Placeholder, @@ -461,11 +502,11 @@ mod test { let a = edn::PlainSymbol::new("?a"); let v = edn::PlainSymbol::new("?v"); let tx = edn::PlainSymbol::new("?tx"); - let input = [edn::Value::Vector(vec!(edn::Value::PlainSymbol(s.clone()), + let input = edn::Value::Vector(vec!(edn::Value::PlainSymbol(s.clone()), edn::Value::PlainSymbol(e.clone()), edn::Value::PlainSymbol(a.clone()), edn::Value::PlainSymbol(v.clone()), - edn::Value::PlainSymbol(tx.clone())))]; + edn::Value::PlainSymbol(tx.clone()))); assert_parses_to!(Where::pattern, input, WhereClause::Pattern(Pattern { source: Some(SrcVar::NamedSrc("x".to_string())), entity: PatternNonValuePlace::Variable(variable(e)), @@ -481,13 +522,13 @@ mod test { let a = edn::NamespacedKeyword::new("foo", "_bar"); let v = OrderedFloat(99.9); let tx = edn::PlainSymbol::new("?tx"); - let input = [edn::Value::Vector(vec!(edn::Value::PlainSymbol(e.clone()), + let input = edn::Value::Vector(vec!(edn::Value::PlainSymbol(e.clone()), edn::Value::NamespacedKeyword(a.clone()), edn::Value::Float(v.clone()), - edn::Value::PlainSymbol(tx.clone())))]; + edn::Value::PlainSymbol(tx.clone()))); let mut par = Where::pattern(); - let result = par.parse(&input[..]); + let result = par.parse(input.with_spans().into_atom_stream()); assert!(matches!(result, Err(_)), "Expected a parse error."); } @@ -497,10 +538,10 @@ mod test { let a = edn::NamespacedKeyword::new("foo", "_bar"); let v = edn::PlainSymbol::new("?v"); let tx = edn::PlainSymbol::new("?tx"); - let input = [edn::Value::Vector(vec!(edn::Value::PlainSymbol(e.clone()), + let input = edn::Value::Vector(vec!(edn::Value::PlainSymbol(e.clone()), edn::Value::NamespacedKeyword(a.clone()), edn::Value::PlainSymbol(v.clone()), - edn::Value::PlainSymbol(tx.clone())))]; + edn::Value::PlainSymbol(tx.clone()))); // Note that the attribute is no longer reversed, and the entity and value have // switched places. @@ -516,7 +557,7 @@ mod test { #[test] fn test_rule_vars() { let e = edn::PlainSymbol::new("?e"); - let input = [edn::Value::Vector(vec![edn::Value::PlainSymbol(e.clone())])]; + let input = edn::Value::Vector(vec![edn::Value::PlainSymbol(e.clone())]); assert_parses_to!(Where::rule_vars, input, vec![variable(e.clone())]); } @@ -527,11 +568,11 @@ mod test { let e = edn::PlainSymbol::new("?e"); let a = edn::PlainSymbol::new("?a"); let v = edn::PlainSymbol::new("?v"); - let input = [edn::Value::List( + let input = edn::Value::List( vec![edn::Value::PlainSymbol(oj), edn::Value::Vector(vec![edn::Value::PlainSymbol(e.clone()), edn::Value::PlainSymbol(a.clone()), - edn::Value::PlainSymbol(v.clone())])].into_iter().collect())]; + edn::Value::PlainSymbol(v.clone())])].into_iter().collect()); assert_parses_to!(Where::or_clause, input, WhereClause::OrJoin( OrJoin { @@ -553,12 +594,12 @@ mod test { let e = edn::PlainSymbol::new("?e"); let a = edn::PlainSymbol::new("?a"); let v = edn::PlainSymbol::new("?v"); - let input = [edn::Value::List( + let input = edn::Value::List( vec![edn::Value::PlainSymbol(oj), edn::Value::Vector(vec![edn::Value::PlainSymbol(e.clone())]), edn::Value::Vector(vec![edn::Value::PlainSymbol(e.clone()), edn::Value::PlainSymbol(a.clone()), - edn::Value::PlainSymbol(v.clone())])].into_iter().collect())]; + edn::Value::PlainSymbol(v.clone())])].into_iter().collect()); assert_parses_to!(Where::or_join_clause, input, WhereClause::OrJoin( OrJoin { @@ -577,16 +618,16 @@ mod test { #[test] fn test_find_sp_variable() { let sym = edn::PlainSymbol::new("?x"); - let input = [edn::Value::PlainSymbol(sym.clone())]; - assert_parses_to!(Query::variable, input, variable(sym)); + let input = edn::Value::Vector(vec![edn::Value::PlainSymbol(sym.clone())]); + assert_parses_to!(|| vector().of(Query::variable()), input, variable(sym)); } #[test] fn test_find_scalar() { let sym = edn::PlainSymbol::new("?x"); let period = edn::PlainSymbol::new("."); - let input = [edn::Value::PlainSymbol(sym.clone()), edn::Value::PlainSymbol(period.clone())]; - assert_parses_to!(Find::find_scalar, + let input = edn::Value::Vector(vec![edn::Value::PlainSymbol(sym.clone()), edn::Value::PlainSymbol(period.clone())]); + assert_parses_to!(|| vector().of(Find::find_scalar()), input, FindSpec::FindScalar(Element::Variable(variable(sym)))); } @@ -595,8 +636,8 @@ mod test { fn test_find_coll() { let sym = edn::PlainSymbol::new("?x"); let period = edn::PlainSymbol::new("..."); - let input = [edn::Value::Vector(vec![edn::Value::PlainSymbol(sym.clone()), - edn::Value::PlainSymbol(period.clone())])]; + let input = edn::Value::Vector(vec![edn::Value::PlainSymbol(sym.clone()), + edn::Value::PlainSymbol(period.clone())]); assert_parses_to!(Find::find_coll, input, FindSpec::FindColl(Element::Variable(variable(sym)))); @@ -606,8 +647,8 @@ mod test { fn test_find_rel() { let vx = edn::PlainSymbol::new("?x"); let vy = edn::PlainSymbol::new("?y"); - let input = [edn::Value::PlainSymbol(vx.clone()), edn::Value::PlainSymbol(vy.clone())]; - assert_parses_to!(Find::find_rel, + let input = edn::Value::Vector(vec![edn::Value::PlainSymbol(vx.clone()), edn::Value::PlainSymbol(vy.clone())]); + assert_parses_to!(|| vector().of(Find::find_rel()), input, FindSpec::FindRel(vec![Element::Variable(variable(vx)), Element::Variable(variable(vy))])); @@ -617,37 +658,11 @@ mod test { fn test_find_tuple() { let vx = edn::PlainSymbol::new("?x"); let vy = edn::PlainSymbol::new("?y"); - let input = [edn::Value::Vector(vec![edn::Value::PlainSymbol(vx.clone()), - edn::Value::PlainSymbol(vy.clone())])]; + let input = edn::Value::Vector(vec![edn::Value::PlainSymbol(vx.clone()), + edn::Value::PlainSymbol(vy.clone())]); assert_parses_to!(Find::find_tuple, input, FindSpec::FindTuple(vec![Element::Variable(variable(vx)), Element::Variable(variable(vy))])); } - - #[test] - fn test_find_processing() { - let vx = edn::PlainSymbol::new("?x"); - let vy = edn::PlainSymbol::new("?y"); - let ellipsis = edn::PlainSymbol::new("..."); - let period = edn::PlainSymbol::new("."); - - let scalar = [edn::Value::PlainSymbol(vx.clone()), edn::Value::PlainSymbol(period.clone())]; - let tuple = [edn::Value::Vector(vec![edn::Value::PlainSymbol(vx.clone()), - edn::Value::PlainSymbol(vy.clone())])]; - let coll = [edn::Value::Vector(vec![edn::Value::PlainSymbol(vx.clone()), - edn::Value::PlainSymbol(ellipsis.clone())])]; - let rel = [edn::Value::PlainSymbol(vx.clone()), edn::Value::PlainSymbol(vy.clone())]; - - assert_eq!(FindSpec::FindScalar(Element::Variable(variable(vx.clone()))), - find_seq_to_find_spec(&scalar).unwrap()); - assert_eq!(FindSpec::FindTuple(vec![Element::Variable(variable(vx.clone())), - Element::Variable(variable(vy.clone()))]), - find_seq_to_find_spec(&tuple).unwrap()); - assert_eq!(FindSpec::FindColl(Element::Variable(variable(vx.clone()))), - find_seq_to_find_spec(&coll).unwrap()); - assert_eq!(FindSpec::FindRel(vec![Element::Variable(variable(vx.clone())), - Element::Variable(variable(vy.clone()))]), - find_seq_to_find_spec(&rel).unwrap()); - } } diff --git a/query-parser/src/util.rs b/query-parser/src/util.rs deleted file mode 100644 index e92a3cee..00000000 --- a/query-parser/src/util.rs +++ /dev/null @@ -1,151 +0,0 @@ -// Copyright 2016 Mozilla -// -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use -// this file except in compliance with the License. You may obtain a copy of the -// License at http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed -// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. - -extern crate edn; - -use std::collections::BTreeMap; - -/// Take a slice of EDN values, as would be extracted from an -/// `edn::Value::Vector`, and turn it into a map. -/// -/// The slice must consist of subsequences of an initial plain -/// keyword, followed by one or more non-plain-keyword values. -/// -/// The plain keywords are used as keys into the resulting map. -/// The values are accumulated into vectors. -/// -/// Invalid input causes this function to return `None`. -/// -/// TODO: this function can be generalized to take an arbitrary -/// destructuring/break function, yielding a map with a custom -/// key type and splitting in the right places. -pub fn vec_to_keyword_map(vec: &[edn::Value]) -> Option>> { - let mut m = BTreeMap::new(); - - if vec.is_empty() { - return Some(m); - } - - if vec.len() == 1 { - return None; - } - - // Turn something like - // - // `[:foo 1 2 3 :bar 4 5 6]` - // - // into - // - // `Some((:foo, [1 2 3]))` - fn step(slice: &[edn::Value]) -> Option<(edn::Keyword, Vec)> { - // [:foo 1 2 3 :bar] is invalid: nothing follows `:bar`. - if slice.len() < 2 { - return None; - } - - // The first item must be a keyword. - if let edn::Value::Keyword(ref k) = slice[0] { - - // The second can't be: [:foo :bar 1 2 3] is invalid. - if slice[1].is_keyword() { - return None; - } - - // Accumulate items until we reach the next keyword. - let mut acc = Vec::new(); - for v in &slice[1..] { - if v.is_keyword() { - break; - } - acc.push(v.clone()); - } - return Some((k.clone(), acc)); - } - - None - } - - let mut bits = vec; - while !bits.is_empty() { - match step(bits) { - Some((k, v)) => { - bits = &bits[(v.len() + 1)..]; - - // Duplicate keys aren't allowed. - if m.contains_key(&k) { - return None; - } - m.insert(k, v); - }, - None => return None, - } - } - return Some(m); -} - -#[test] -fn test_vec_to_keyword_map() { - let foo = edn::symbols::Keyword("foo".to_string()); - let bar = edn::symbols::Keyword("bar".to_string()); - let baz = edn::symbols::Keyword("baz".to_string()); - - // [:foo 1 2 3 :bar 4] - let input = vec!(edn::Value::Keyword(foo.clone()), - edn::Value::Integer(1), - edn::Value::Integer(2), - edn::Value::Integer(3), - edn::Value::Keyword(bar.clone()), - edn::Value::Integer(4)); - - let m = vec_to_keyword_map(&input).unwrap(); - - assert!(m.contains_key(&foo)); - assert!(m.contains_key(&bar)); - assert!(!m.contains_key(&baz)); - - let onetwothree = vec!(edn::Value::Integer(1), - edn::Value::Integer(2), - edn::Value::Integer(3)); - let four = vec!(edn::Value::Integer(4)); - - assert_eq!(m.get(&foo).unwrap(), &onetwothree); - assert_eq!(m.get(&bar).unwrap(), &four); - - // Trailing keywords aren't allowed. - assert_eq!(None, - vec_to_keyword_map(&vec!(edn::Value::Keyword(foo.clone())))); - assert_eq!(None, - vec_to_keyword_map(&vec!(edn::Value::Keyword(foo.clone()), - edn::Value::Integer(2), - edn::Value::Keyword(bar.clone())))); - - // Duplicate keywords aren't allowed. - assert_eq!(None, - vec_to_keyword_map(&vec!(edn::Value::Keyword(foo.clone()), - edn::Value::Integer(2), - edn::Value::Keyword(foo.clone()), - edn::Value::Integer(1)))); - - // Starting with anything but a keyword isn't allowed. - assert_eq!(None, - vec_to_keyword_map(&vec!(edn::Value::Integer(2), - edn::Value::Keyword(foo.clone()), - edn::Value::Integer(1)))); - - // Consecutive keywords aren't allowed. - assert_eq!(None, - vec_to_keyword_map(&vec!(edn::Value::Keyword(foo.clone()), - edn::Value::Keyword(bar.clone()), - edn::Value::Integer(1)))); - - // Empty lists return an empty map. - assert_eq!(BTreeMap::new(), vec_to_keyword_map(&vec!()).unwrap()); -} - diff --git a/query-parser/tests/find_tests.rs b/query-parser/tests/find_tests.rs index dba420ed..36a6064b 100644 --- a/query-parser/tests/find_tests.rs +++ b/query-parser/tests/find_tests.rs @@ -97,7 +97,7 @@ fn can_parse_simple_or() { #[test] fn can_parse_unit_or_join() { let s = "[:find ?x . :where (or-join [?x] [?x _ 15])]"; - let p = parse_find_string(s).unwrap(); + let p = parse_find_string(s).expect("to be able to parse find"); assert_eq!(p.find_spec, FindSpec::FindScalar(Element::Variable(Variable::from_valid_name("?x")))); diff --git a/query/src/lib.rs b/query/src/lib.rs index 23fcdf3c..b9037117 100644 --- a/query/src/lib.rs +++ b/query/src/lib.rs @@ -67,15 +67,15 @@ impl Variable { } pub trait FromValue { - fn from_value(v: &edn::Value) -> Option; + fn from_value(v: edn::ValueAndSpan) -> Option; } /// If the provided EDN value is a PlainSymbol beginning with '?', return /// it wrapped in a Variable. If not, return None. /// TODO: intern strings. #398. impl FromValue for Variable { - fn from_value(v: &edn::Value) -> Option { - if let edn::Value::PlainSymbol(ref s) = *v { + fn from_value(v: edn::ValueAndSpan) -> Option { + if let edn::SpannedValue::PlainSymbol(ref s) = v.inner { Variable::from_symbol(s) } else { None @@ -112,8 +112,8 @@ impl fmt::Debug for Variable { pub struct PredicateFn(pub PlainSymbol); impl FromValue for PredicateFn { - fn from_value(v: &edn::Value) -> Option { - if let edn::Value::PlainSymbol(ref s) = *v { + fn from_value(v: edn::ValueAndSpan) -> Option { + if let edn::SpannedValue::PlainSymbol(ref s) = v.inner { PredicateFn::from_symbol(s) } else { None @@ -135,8 +135,8 @@ pub enum SrcVar { } impl FromValue for SrcVar { - fn from_value(v: &edn::Value) -> Option { - if let edn::Value::PlainSymbol(ref s) = *v { + fn from_value(v: edn::ValueAndSpan) -> Option { + if let edn::SpannedValue::PlainSymbol(ref s) = v.inner { SrcVar::from_symbol(s) } else { None @@ -184,16 +184,17 @@ pub enum FnArg { } impl FromValue for FnArg { - fn from_value(v: &edn::Value) -> Option { + fn from_value(v: edn::ValueAndSpan) -> Option { // TODO: support SrcVars. - Variable::from_value(v) + Variable::from_value(v.clone()) // TODO: don't clone! .and_then(|v| Some(FnArg::Variable(v))) - .or_else(|| - match v { - &edn::Value::Integer(i) => Some(FnArg::EntidOrInteger(i)), - &edn::Value::Float(f) => Some(FnArg::Constant(NonIntegerConstant::Float(f))), + .or_else(|| { + println!("from_value {}", v.inner); + match v.inner { + edn::SpannedValue::Integer(i) => Some(FnArg::EntidOrInteger(i)), + edn::SpannedValue::Float(f) => Some(FnArg::Constant(NonIntegerConstant::Float(f))), _ => unimplemented!(), - }) + }}) } } @@ -234,14 +235,14 @@ impl PatternNonValuePlace { } impl FromValue for PatternNonValuePlace { - fn from_value(v: &edn::Value) -> Option { - match v { - &edn::Value::Integer(x) => if x >= 0 { + fn from_value(v: edn::ValueAndSpan) -> Option { + match v.inner { + edn::SpannedValue::Integer(x) => if x >= 0 { Some(PatternNonValuePlace::Entid(x)) } else { None }, - &edn::Value::PlainSymbol(ref x) => if x.0.as_str() == "_" { + edn::SpannedValue::PlainSymbol(ref x) => if x.0.as_str() == "_" { Some(PatternNonValuePlace::Placeholder) } else { if let Some(v) = Variable::from_symbol(x) { @@ -250,7 +251,7 @@ impl FromValue for PatternNonValuePlace { None } }, - &edn::Value::NamespacedKeyword(ref x) => + edn::SpannedValue::NamespacedKeyword(ref x) => Some(PatternNonValuePlace::Ident(Rc::new(x.clone()))), _ => None, } @@ -276,23 +277,23 @@ pub enum PatternValuePlace { } impl FromValue for PatternValuePlace { - fn from_value(v: &edn::Value) -> Option { - match v { - &edn::Value::Integer(x) => + fn from_value(v: edn::ValueAndSpan) -> Option { + match v.inner { + edn::SpannedValue::Integer(x) => Some(PatternValuePlace::EntidOrInteger(x)), - &edn::Value::PlainSymbol(ref x) if x.0.as_str() == "_" => + edn::SpannedValue::PlainSymbol(ref x) if x.0.as_str() == "_" => Some(PatternValuePlace::Placeholder), - &edn::Value::PlainSymbol(ref x) => + edn::SpannedValue::PlainSymbol(ref x) => Variable::from_symbol(x).map(PatternValuePlace::Variable), - &edn::Value::NamespacedKeyword(ref x) => + edn::SpannedValue::NamespacedKeyword(ref x) => Some(PatternValuePlace::IdentOrKeyword(Rc::new(x.clone()))), - &edn::Value::Boolean(x) => + edn::SpannedValue::Boolean(x) => Some(PatternValuePlace::Constant(NonIntegerConstant::Boolean(x))), - &edn::Value::Float(x) => + edn::SpannedValue::Float(x) => Some(PatternValuePlace::Constant(NonIntegerConstant::Float(x))), - &edn::Value::BigInteger(ref x) => + edn::SpannedValue::BigInteger(ref x) => Some(PatternValuePlace::Constant(NonIntegerConstant::BigInteger(x.clone()))), - &edn::Value::Text(ref x) => + edn::SpannedValue::Text(ref x) => // TODO: intern strings. #398. Some(PatternValuePlace::Constant(NonIntegerConstant::Text(Rc::new(x.clone())))), _ => None, @@ -685,4 +686,4 @@ impl ContainsVariables for Pattern { acc_ref(acc, v) } } -} \ No newline at end of file +} From 720fbf3d01ab5ee007fe1e4606572caa497124a9 Mon Sep 17 00:00:00 2001 From: Nick Alexander Date: Thu, 30 Mar 2017 09:52:26 -0700 Subject: [PATCH 08/18] Part 4: Use `value_and_span` apparatus in root crate. --- src/conn.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/conn.rs b/src/conn.rs index d82a044a..a4037df1 100644 --- a/src/conn.rs +++ b/src/conn.rs @@ -129,9 +129,8 @@ impl Conn { sqlite: &mut rusqlite::Connection, transaction: &str) -> Result { - let assertion_vector = edn::parse::value(transaction) - .map(|x| x.without_spans())?; - let entities = mentat_tx_parser::Tx::parse(&[assertion_vector][..])?; + let assertion_vector = edn::parse::value(transaction)?; + let entities = mentat_tx_parser::Tx::parse(assertion_vector)?; let tx = sqlite.transaction()?; @@ -178,7 +177,6 @@ mod tests { use super::*; extern crate mentat_parser_utils; - use self::mentat_parser_utils::ValueParseError; #[test] fn test_transact_errors() { @@ -203,7 +201,7 @@ mod tests { // Bad transaction data: missing leading :db/add. let report = conn.transact(&mut sqlite, "[[\"t\" :db/ident :b/keyword]]"); match report.unwrap_err() { - Error(ErrorKind::TxParseError(::mentat_tx_parser::errors::ErrorKind::ParseError(ValueParseError { .. })), _) => { }, + Error(ErrorKind::TxParseError(::mentat_tx_parser::errors::ErrorKind::ParseError(_)), _) => { }, x => panic!("expected EDN parse error, got {:?}", x), } From 7135eeac49bc3684919dfa450705c343da913d0a Mon Sep 17 00:00:00 2001 From: Nick Alexander Date: Mon, 3 Apr 2017 11:23:28 -0700 Subject: [PATCH 09/18] Review comment: Make Span and SpanPosition Copy. --- edn/src/edn.rustpeg | 36 ++++++++++++++++++------------------ edn/src/types.rs | 10 ++++++++-- 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/edn/src/edn.rustpeg b/edn/src/edn.rustpeg index 87076cc8..b7328ba5 100644 --- a/edn/src/edn.rustpeg +++ b/edn/src/edn.rustpeg @@ -32,7 +32,7 @@ pub nil -> ValueAndSpan = start:#position "nil" end:#position { ValueAndSpan { inner: SpannedValue::Nil, - span: Span(start, end) + span: Span::new(start, end) } } @@ -40,7 +40,7 @@ pub nan -> ValueAndSpan = start:#position "#f" whitespace+ "NaN" end:#position { ValueAndSpan { inner: SpannedValue::Float(OrderedFloat(NAN)), - span: Span(start, end) + span: Span::new(start, end) } } @@ -48,7 +48,7 @@ pub infinity -> ValueAndSpan = start:#position "#f" whitespace+ s:$(sign) "Infinity" end:#position { ValueAndSpan { inner: SpannedValue::Float(OrderedFloat(if s == "+" { INFINITY } else { NEG_INFINITY })), - span: Span(start, end) + span: Span::new(start, end) } } @@ -56,13 +56,13 @@ pub boolean -> ValueAndSpan = start:#position "true" end:#position { ValueAndSpan { inner: SpannedValue::Boolean(true), - span: Span(start, end) + span: Span::new(start, end) } } / start:#position "false" end:#position { ValueAndSpan { inner: SpannedValue::Boolean(false), - span: Span(start, end) + span: Span::new(start, end) } } @@ -77,7 +77,7 @@ pub bigint -> ValueAndSpan = start:#position b:$( sign? digit+ ) "N" end:#position { ValueAndSpan { inner: SpannedValue::BigInteger(b.parse::().unwrap()), - span: Span(start, end) + span: Span::new(start, end) } } @@ -85,7 +85,7 @@ pub octalinteger -> ValueAndSpan = start:#position "0" i:$( octaldigit+ ) end:#position { ValueAndSpan { inner: SpannedValue::Integer(i64::from_str_radix(i, 8).unwrap()), - span: Span(start, end) + span: Span::new(start, end) } } @@ -93,7 +93,7 @@ pub hexinteger -> ValueAndSpan = start:#position "0x" i:$( hex+ ) end:#position { ValueAndSpan { inner: SpannedValue::Integer(i64::from_str_radix(i, 16).unwrap()), - span: Span(start, end) + span: Span::new(start, end) } } @@ -102,7 +102,7 @@ pub basedinteger -> ValueAndSpan = start:#position b:$( validbase ) "r" i:$( alphanumeric+ ) end:#position { ValueAndSpan { inner: SpannedValue::Integer(i64::from_str_radix(i, b.parse::().unwrap()).unwrap()), - span: Span(start, end) + span: Span::new(start, end) } } @@ -110,7 +110,7 @@ pub integer -> ValueAndSpan = start:#position i:$( sign? digit+ ) end:#position { ValueAndSpan { inner: SpannedValue::Integer(i.parse::().unwrap()), - span: Span(start, end) + span: Span::new(start, end) } } @@ -124,7 +124,7 @@ pub float -> ValueAndSpan = start:#position f:$( frac_exp / exp / frac ) end:#position { ValueAndSpan { inner: SpannedValue::Float(OrderedFloat(f.parse::().unwrap())), - span: Span(start, end) + span: Span::new(start, end) } } @@ -138,7 +138,7 @@ pub text -> ValueAndSpan = start:#position "\"" t:$( char* ) "\"" end:#position { ValueAndSpan { inner: SpannedValue::Text(t.to_string()), - span: Span(start, end) + span: Span::new(start, end) } } @@ -164,7 +164,7 @@ pub symbol -> ValueAndSpan = end:#position { ValueAndSpan { inner: SpannedValue::from_symbol(ns, n), - span: Span(start, end) + span: Span::new(start, end) } } @@ -176,7 +176,7 @@ pub keyword -> ValueAndSpan = end:#position { ValueAndSpan { inner: SpannedValue::from_keyword(ns, n), - span: Span(start, end) + span: Span::new(start, end) } } @@ -184,7 +184,7 @@ pub list -> ValueAndSpan = start:#position "(" __ v:(value)* __ ")" end:#position { ValueAndSpan { inner: SpannedValue::List(LinkedList::from_iter(v)), - span: Span(start, end) + span: Span::new(start, end) } } @@ -192,7 +192,7 @@ pub vector -> ValueAndSpan = start:#position "[" __ v:(value)* __ "]" end:#position { ValueAndSpan { inner: SpannedValue::Vector(v), - span: Span(start, end) + span: Span::new(start, end) } } @@ -200,7 +200,7 @@ pub set -> ValueAndSpan = start:#position "#{" __ v:(value)* __ "}" end:#position { ValueAndSpan { inner: SpannedValue::Set(BTreeSet::from_iter(v)), - span: Span(start, end) + span: Span::new(start, end) } } @@ -213,7 +213,7 @@ pub map -> ValueAndSpan = start:#position "{" __ v:(pair)* __ "}" end:#position { ValueAndSpan { inner: SpannedValue::Map(BTreeMap::from_iter(v)), - span: Span(start, end) + span: Span::new(start, end) } } diff --git a/edn/src/types.rs b/edn/src/types.rs index efd2eb4c..c511c480 100644 --- a/edn/src/types.rs +++ b/edn/src/types.rs @@ -66,8 +66,14 @@ pub enum SpannedValue { } /// Span represents the current offset (start, end) into the input string. -#[derive(PartialEq, Eq, Hash, Clone, Debug)] -pub struct Span(pub usize, pub usize); +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub struct Span(pub u32, pub u32); + +impl Span { + pub fn new(start: usize, end: usize) -> Span { + Span(start as u32, end as u32) + } +} /// A wrapper type around `SpannedValue` and `Span`, representing some EDN value /// and the parsing offset (start, end) in the original EDN string. From f8e75d817e8bc9aa15ed984d94da90e273fede87 Mon Sep 17 00:00:00 2001 From: Nick Alexander Date: Mon, 3 Apr 2017 11:23:50 -0700 Subject: [PATCH 10/18] Review comment: nits. --- parser-utils/src/value_and_span.rs | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/parser-utils/src/value_and_span.rs b/parser-utils/src/value_and_span.rs index e0715f5d..4863130e 100644 --- a/parser-utils/src/value_and_span.rs +++ b/parser-utils/src/value_and_span.rs @@ -9,7 +9,11 @@ // specific language governing permissions and limitations under the License. use std; -use std::fmt::{Display, Formatter}; +use std::fmt::{ + Debug, + Display, + Formatter, +}; use std::cmp::Ordering; use combine::{ @@ -25,7 +29,7 @@ use combine::{ satisfy, satisfy_map, }; -use combine::primitives; // To not shadow Error +use combine::primitives; // To not shadow Error. use combine::primitives::{ Consumed, FastResult, @@ -40,12 +44,12 @@ use combine::combinator::{ use edn; /// A wrapper to let us order `edn::Span` in whatever way is appropriate for parsing with `combine`. -#[derive(Clone, Debug)] +#[derive(Clone, Copy, Debug)] pub struct SpanPosition(edn::Span); impl Display for SpanPosition { fn fmt(&self, f: &mut Formatter) -> ::std::fmt::Result { - write!(f, "{:?}", self.0) + self.0.fmt(f) } } @@ -292,9 +296,9 @@ pub fn value(value: edn::Value) -> Box ParseResult +fn keyword_map_(input: Stream) -> ParseResult { - // One run is a keyword followed by at one or more non-keywords. + // One run is a keyword followed by one or more non-keywords. let run = (satisfy(|v: edn::ValueAndSpan| v.inner.is_keyword()), many1(satisfy(|v: edn::ValueAndSpan| !v.inner.is_keyword())) .map(|vs: Vec| { From 0165a842ef3f3c82b03b08a17ef02f1c163d890b Mon Sep 17 00:00:00 2001 From: Nick Alexander Date: Mon, 3 Apr 2017 11:48:01 -0700 Subject: [PATCH 11/18] Review comment: Make `or` be `or_exactly`. I baked the eof checking directly into the parser, rather than using the skip and eof parsers. I also took the time to restore some tests that were mistakenly commented out. --- parser-utils/src/value_and_span.rs | 41 ++++--- query-parser/src/parse.rs | 34 +++--- tx-parser/src/lib.rs | 179 ++++++++++++++--------------- 3 files changed, 126 insertions(+), 128 deletions(-) diff --git a/parser-utils/src/value_and_span.rs b/parser-utils/src/value_and_span.rs index 4863130e..1fa49e56 100644 --- a/parser-utils/src/value_and_span.rs +++ b/parser-utils/src/value_and_span.rs @@ -22,7 +22,6 @@ use combine::{ Parser, ParseResult, StreamOnce, - eof, many, many1, parser, @@ -35,10 +34,8 @@ use combine::primitives::{ FastResult, }; use combine::combinator::{ - Eof, Expected, FnParser, - Skip, }; use edn; @@ -165,9 +162,9 @@ impl Item for edn::ValueAndSpan { } } -/// `Of` and `of` allow us to express nested parsers naturally. +/// `OfExactly` and `of_exactly` allow us to express nested parsers naturally. /// -/// For example, `vector().of(many(list()))` parses a vector-of-lists, like [(1 2) (:a :b) ("test") ()]. +/// For example, `vector().of_exactly(many(list()))` parses a vector-of-lists, like [(1 2) (:a :b) ("test") ()]. /// /// The "outer" parser `P` and the "nested" parser `N` must be compatible: `P` must produce an /// output `edn::ValueAndSpan` which can itself be turned into a stream of child elements; and `N` @@ -175,9 +172,9 @@ impl Item for edn::ValueAndSpan { /// nested parser to the outer parser, which is part of what has made parsing `&'a [edn::Value]` /// difficult. #[derive(Clone)] -pub struct Of(P, N); +pub struct OfExactly(P, N); -impl Parser for Of +impl Parser for OfExactly where P: Parser, N: Parser, { @@ -190,7 +187,12 @@ impl Parser for Of match self.0.parse_lazy(input) { ConsumedOk((outer_value, outer_input)) => { match self.1.parse_lazy(outer_value.into_child_stream()) { - ConsumedOk((inner_value, _)) | EmptyOk((inner_value, _)) => ConsumedOk((inner_value, outer_input)), + ConsumedOk((inner_value, mut inner_input)) | EmptyOk((inner_value, mut inner_input)) => { + match inner_input.uncons() { + Err(ref err) if *err == primitives::Error::end_of_input() => ConsumedOk((inner_value, outer_input)), + _ => EmptyErr(ParseError::empty(inner_input.position())), + } + }, // TODO: Improve the error output to reference the nested value (or span) in // some way. This seems surprisingly difficult to do, so we just surface the // inner error message right now. See also the comment below. @@ -199,7 +201,12 @@ impl Parser for Of }, EmptyOk((outer_value, outer_input)) => { match self.1.parse_lazy(outer_value.into_child_stream()) { - ConsumedOk((inner_value, _)) | EmptyOk((inner_value, _)) => EmptyOk((inner_value, outer_input)), + ConsumedOk((inner_value, mut inner_input)) | EmptyOk((inner_value, mut inner_input)) => { + match inner_input.uncons() { + Err(ref err) if *err == primitives::Error::end_of_input() => EmptyOk((inner_value, outer_input)), + _ => EmptyErr(ParseError::empty(inner_input.position())), + } + }, // TODO: Improve the error output. See the comment above. EmptyErr(e) | ConsumedErr(e) => EmptyErr(e), } @@ -215,27 +222,27 @@ impl Parser for Of } #[inline(always)] -pub fn of(p: P, n: N) -> Of>> +pub fn of_exactly(p: P, n: N) -> OfExactly where P: Parser, N: Parser, { - Of(p, n.skip(eof())) + OfExactly(p, n) } /// We need a trait to define `Parser.of` and have it live outside of the `combine` crate. -pub trait OfParsing: Parser + Sized { - fn of(self, n: N) -> Of>> +pub trait OfExactlyParsing: Parser + Sized { + fn of_exactly(self, n: N) -> OfExactly where Self: Sized, N: Parser; } -impl

OfParsing for P +impl

OfExactlyParsing for P where P: Parser { - fn of(self, n: N) -> Of>> + fn of_exactly(self, n: N) -> OfExactly where N: Parser { - of(self, n) + of_exactly(self, n) } } @@ -311,7 +318,7 @@ fn keyword_map_(input: Stream) -> ParseResult } })); - let mut runs = vector().of(many::, _>(run)); + let mut runs = vector().of_exactly(many::, _>(run)); let (data, input) = try!(runs.parse_lazy(input).into()); diff --git a/query-parser/src/parse.rs b/query-parser/src/parse.rs index 1ba01b98..aedd7624 100644 --- a/query-parser/src/parse.rs +++ b/query-parser/src/parse.rs @@ -26,7 +26,7 @@ use self::mentat_parser_utils::{ use self::mentat_parser_utils::value_and_span::Stream as ValueStream; use self::mentat_parser_utils::value_and_span::{ Item, - OfParsing, + OfExactlyParsing, keyword_map, list, map, @@ -162,7 +162,7 @@ def_parser!(Where, or_join, edn::ValueAndSpan, { def_parser!(Where, rule_vars, Vec, { seq() - .of(many1(Query::variable())) + .of_exactly(many1(Query::variable())) }); def_parser!(Where, or_pattern_clause, OrWhereClause, { @@ -171,7 +171,7 @@ def_parser!(Where, or_pattern_clause, OrWhereClause, { def_parser!(Where, or_and_clause, OrWhereClause, { seq() - .of(Where::and() + .of_exactly(Where::and() .with(many1(Where::clause())) .map(OrWhereClause::And)) }); @@ -182,7 +182,7 @@ def_parser!(Where, or_where_clause, OrWhereClause, { def_parser!(Where, or_clause, WhereClause, { seq() - .of(Where::or() + .of_exactly(Where::or() .with(many1(Where::or_where_clause())) .map(|clauses| { WhereClause::OrJoin( @@ -195,7 +195,7 @@ def_parser!(Where, or_clause, WhereClause, { def_parser!(Where, or_join_clause, WhereClause, { seq() - .of(Where::or_join() + .of_exactly(Where::or_join() .with(Where::rule_vars()) .and(many1(Where::or_where_clause())) .map(|(vars, clauses)| { @@ -212,8 +212,8 @@ def_parser!(Where, pred, WhereClause, { // Accept either a nested list or a nested vector here: // `[(foo ?x ?y)]` or `[[foo ?x ?y]]` vector() - .of(seq() - .of((Query::predicate_fn(), Query::arguments()) + .of_exactly(seq() + .of_exactly((Query::predicate_fn(), Query::arguments()) .map(|(f, args)| { WhereClause::Pred( Predicate { @@ -225,7 +225,7 @@ def_parser!(Where, pred, WhereClause, { def_parser!(Where, pattern, WhereClause, { vector() - .of( + .of_exactly( // While *technically* Datomic allows you to have a query like: // [:find … :where [[?x]]] // We don't -- we require at least e, a. @@ -313,7 +313,7 @@ def_parser!(Find, find_scalar, FindSpec, { def_parser!(Find, find_coll, FindSpec, { vector() - .of(Query::variable() + .of_exactly(Query::variable() .map(|var| FindSpec::FindColl(Element::Variable(var))) .skip(Find::ellipsis())) }); @@ -328,7 +328,7 @@ def_parser!(Find, find_rel, FindSpec, { def_parser!(Find, find_tuple, FindSpec, { vector() - .of(Find::elements().map(FindSpec::FindTuple)) + .of_exactly(Find::elements().map(FindSpec::FindTuple)) }); /// Parse a stream values into one of four find specs. @@ -395,16 +395,16 @@ enum FindQueryPart { /// construct a `FindQuery` from them. def_parser!(Find, query, FindQuery, { let p_find_spec = Find::literal_find() - .with(vector().of(Find::spec().map(FindQueryPart::FindSpec))); + .with(vector().of_exactly(Find::spec().map(FindQueryPart::FindSpec))); let p_with_vars = Find::literal_with() - .with(vector().of(many(Query::variable()).map(FindQueryPart::With))); + .with(vector().of_exactly(many(Query::variable()).map(FindQueryPart::With))); let p_where_clauses = Find::literal_where() - .with(vector().of(Where::clauses().map(FindQueryPart::WhereClauses))).expected(":where clauses"); + .with(vector().of_exactly(Where::clauses().map(FindQueryPart::WhereClauses))).expected(":where clauses"); (or(map(), keyword_map())) - .of(many(choice::<[&mut Parser; 3], _>([ + .of_exactly(many(choice::<[&mut Parser; 3], _>([ &mut try(p_find_spec), &mut try(p_with_vars), &mut try(p_where_clauses), @@ -619,7 +619,7 @@ mod test { fn test_find_sp_variable() { let sym = edn::PlainSymbol::new("?x"); let input = edn::Value::Vector(vec![edn::Value::PlainSymbol(sym.clone())]); - assert_parses_to!(|| vector().of(Query::variable()), input, variable(sym)); + assert_parses_to!(|| vector().of_exactly(Query::variable()), input, variable(sym)); } #[test] @@ -627,7 +627,7 @@ mod test { let sym = edn::PlainSymbol::new("?x"); let period = edn::PlainSymbol::new("."); let input = edn::Value::Vector(vec![edn::Value::PlainSymbol(sym.clone()), edn::Value::PlainSymbol(period.clone())]); - assert_parses_to!(|| vector().of(Find::find_scalar()), + assert_parses_to!(|| vector().of_exactly(Find::find_scalar()), input, FindSpec::FindScalar(Element::Variable(variable(sym)))); } @@ -648,7 +648,7 @@ mod test { let vx = edn::PlainSymbol::new("?x"); let vy = edn::PlainSymbol::new("?y"); let input = edn::Value::Vector(vec![edn::Value::PlainSymbol(vx.clone()), edn::Value::PlainSymbol(vy.clone())]); - assert_parses_to!(|| vector().of(Find::find_rel()), + assert_parses_to!(|| vector().of_exactly(Find::find_rel()), input, FindSpec::FindRel(vec![Element::Variable(variable(vx)), Element::Variable(variable(vy))])); diff --git a/tx-parser/src/lib.rs b/tx-parser/src/lib.rs index 35f148e7..e3c657c4 100644 --- a/tx-parser/src/lib.rs +++ b/tx-parser/src/lib.rs @@ -45,7 +45,7 @@ use mentat_tx::entities::{ use mentat_parser_utils::{ResultParser}; use mentat_parser_utils::value_and_span::{ Item, - OfParsing, + OfExactlyParsing, integer, list, map, @@ -66,8 +66,8 @@ def_parser!(Tx, entid, Entid, { }); def_parser!(Tx, lookup_ref, LookupRef, { - list() - .of(value(edn::Value::PlainSymbol(PlainSymbol::new("lookup-ref"))) + list().of_exactly( + value(edn::Value::PlainSymbol(PlainSymbol::new("lookup-ref"))) .with((Tx::entid(), Tx::atom())) .map(|(a, v)| LookupRef { a: a, v: v.without_spans() })) @@ -88,7 +88,7 @@ def_parser!(Tx, atom, edn::ValueAndSpan, { }); def_parser!(Tx, nested_vector, Vec, { - vector().of(many(Tx::atom_or_lookup_ref_or_vector())) + vector().of_exactly(many(Tx::atom_or_lookup_ref_or_vector())) }); def_parser!(Tx, atom_or_lookup_ref_or_vector, AtomOrLookupRefOrVectorOrMapNotation, { @@ -116,12 +116,12 @@ def_parser!(Tx, add_or_retract, Entity, { } }); - vector().of(p) + vector().of_exactly(p) }); def_parser!(Tx, map_notation, MapNotation, { map() - .of(many((Tx::entid(), Tx::atom_or_lookup_ref_or_vector()))) + .of_exactly(many((Tx::entid(), Tx::atom_or_lookup_ref_or_vector()))) .map(|avs: Vec<(Entid, AtomOrLookupRefOrVectorOrMapNotation)>| -> MapNotation { avs.into_iter().collect() }) @@ -133,7 +133,7 @@ def_parser!(Tx, entity, Entity, { }); def_parser!(Tx, entities, Vec, { - vector().of(many(Tx::entity())) + vector().of_exactly(many(Tx::entity())) }); impl Tx { @@ -185,7 +185,9 @@ pub fn remove_db_id(map: &mut MapNotation) -> std::result::Result Value { Value::NamespacedKeyword(NamespacedKeyword::new(namespace, name)) @@ -213,9 +213,9 @@ mod tests { let input = Value::Vector(vec![kw("db", "add"), kw("test", "entid"), kw("test", "a"), - Value::Text("v".into())]).with_spans(); + Value::Text("v".into())]); let mut parser = Tx::entity(); - let result = parser.parse(input.into_atom_stream()).map(|x| x.0); + let result = parser.parse(input.with_spans().into_atom_stream()).map(|x| x.0); assert_eq!(result, Ok(Entity::AddOrRetract { op: OpType::Add, @@ -226,91 +226,82 @@ mod tests { })); } - // #[test] - // fn test_retract() { - // let input = [Value::Vector(vec![kw("db", "retract"), - // Value::Integer(101), - // kw("test", "a"), - // Value::Text("v".into())])]; - // let mut parser = Tx::entity(); - // let result = parser.parse(&input[..]); - // assert_eq!(result, - // Ok((Entity::AddOrRetract { - // op: OpType::Retract, - // e: EntidOrLookupRefOrTempId::Entid(Entid::Entid(101)), - // a: Entid::Ident(NamespacedKeyword::new("test", "a")), - // v: AtomOrLookupRefOrVectorOrMapNotation::Atom(Value::Text("v".into())), - // }, - // &[][..]))); - // } + #[test] + fn test_retract() { + let input = Value::Vector(vec![kw("db", "retract"), + Value::Integer(101), + kw("test", "a"), + Value::Text("v".into())]); + let mut parser = Tx::entity(); + let result = parser.parse(input.with_spans().into_atom_stream()).map(|x| x.0); + assert_eq!(result, + Ok(Entity::AddOrRetract { + op: OpType::Retract, + e: EntidOrLookupRefOrTempId::Entid(Entid::Entid(101)), + a: Entid::Ident(NamespacedKeyword::new("test", "a")), + v: AtomOrLookupRefOrVectorOrMapNotation::Atom(ValueAndSpan::new(SpannedValue::Text("v".into()), Span(25, 28))), + })); + } - // #[test] - // fn test_lookup_ref() { - // let mut list = LinkedList::new(); - // list.push_back(Value::PlainSymbol(PlainSymbol::new("lookup-ref"))); - // list.push_back(kw("test", "a1")); - // list.push_back(Value::Text("v1".into())); + #[test] + fn test_lookup_ref() { + let input = Value::Vector(vec![kw("db", "add"), + Value::List(vec![Value::PlainSymbol(PlainSymbol::new("lookup-ref")), + kw("test", "a1"), + Value::Text("v1".into())].into_iter().collect()), + kw("test", "a"), + Value::Text("v".into())]); + let mut parser = Tx::entity(); + let result = parser.parse(input.with_spans().into_atom_stream()).map(|x| x.0); + assert_eq!(result, + Ok(Entity::AddOrRetract { + op: OpType::Add, + e: EntidOrLookupRefOrTempId::LookupRef(LookupRef { + a: Entid::Ident(NamespacedKeyword::new("test", "a1")), + v: Value::Text("v1".into()), + }), + a: Entid::Ident(NamespacedKeyword::new("test", "a")), + v: AtomOrLookupRefOrVectorOrMapNotation::Atom(ValueAndSpan::new(SpannedValue::Text("v".into()), Span(44, 47))), + })); + } - // let input = [Value::Vector(vec![kw("db", "add"), - // Value::List(list), - // kw("test", "a"), - // Value::Text("v".into())])]; - // let mut parser = Tx::entity(); - // let result = parser.parse(&input[..]); - // assert_eq!(result, - // Ok((Entity::AddOrRetract { - // op: OpType::Add, - // e: EntidOrLookupRefOrTempId::LookupRef(LookupRef { - // a: Entid::Ident(NamespacedKeyword::new("test", "a1")), - // v: Value::Text("v1".into()), - // }), - // a: Entid::Ident(NamespacedKeyword::new("test", "a")), - // v: AtomOrLookupRefOrVectorOrMapNotation::Atom(Value::Text("v".into())), - // }, - // &[][..]))); - // } + #[test] + fn test_nested_vector() { + let input = Value::Vector(vec![kw("db", "add"), + Value::List(vec![Value::PlainSymbol(PlainSymbol::new("lookup-ref")), + kw("test", "a1"), + Value::Text("v1".into())].into_iter().collect()), + kw("test", "a"), + Value::Vector(vec![Value::Text("v1".into()), Value::Text("v2".into())])]); + let mut parser = Tx::entity(); + let result = parser.parse(input.with_spans().into_atom_stream()).map(|x| x.0); + assert_eq!(result, + Ok(Entity::AddOrRetract { + op: OpType::Add, + e: EntidOrLookupRefOrTempId::LookupRef(LookupRef { + a: Entid::Ident(NamespacedKeyword::new("test", "a1")), + v: Value::Text("v1".into()), + }), + a: Entid::Ident(NamespacedKeyword::new("test", "a")), + v: AtomOrLookupRefOrVectorOrMapNotation::Vector(vec![AtomOrLookupRefOrVectorOrMapNotation::Atom(ValueAndSpan::new(SpannedValue::Text("v1".into()), Span(45, 49))), + AtomOrLookupRefOrVectorOrMapNotation::Atom(ValueAndSpan::new(SpannedValue::Text("v2".into()), Span(50, 54)))]), + })); + } - // #[test] - // fn test_nested_vector() { - // let mut list = LinkedList::new(); - // list.push_back(Value::PlainSymbol(PlainSymbol::new("lookup-ref"))); - // list.push_back(kw("test", "a1")); - // list.push_back(Value::Text("v1".into())); + #[test] + fn test_map_notation() { + let mut expected: MapNotation = BTreeMap::default(); + expected.insert(Entid::Ident(NamespacedKeyword::new("db", "id")), AtomOrLookupRefOrVectorOrMapNotation::Atom(ValueAndSpan::new(SpannedValue::Text("t".to_string()), Span(8, 11)))); + expected.insert(Entid::Ident(NamespacedKeyword::new("db", "ident")), AtomOrLookupRefOrVectorOrMapNotation::Atom(ValueAndSpan::new(SpannedValue::NamespacedKeyword(NamespacedKeyword::new("test", "attribute")), Span(22, 37)))); - // let input = [Value::Vector(vec![kw("db", "add"), - // Value::List(list), - // kw("test", "a"), - // Value::Vector(vec![Value::Text("v1".into()), Value::Text("v2".into())])])]; - // let mut parser = Tx::entity(); - // let result = parser.parse(&input[..]); - // assert_eq!(result, - // Ok((Entity::AddOrRetract { - // op: OpType::Add, - // e: EntidOrLookupRefOrTempId::LookupRef(LookupRef { - // a: Entid::Ident(NamespacedKeyword::new("test", "a1")), - // v: Value::Text("v1".into()), - // }), - // a: Entid::Ident(NamespacedKeyword::new("test", "a")), - // v: AtomOrLookupRefOrVectorOrMapNotation::Vector(vec![AtomOrLookupRefOrVectorOrMapNotation::Atom(Value::Text("v1".into())), - // AtomOrLookupRefOrVectorOrMapNotation::Atom(Value::Text("v2".into()))]), - // }, - // &[][..]))); - // } + let mut map: BTreeMap = BTreeMap::default(); + map.insert(kw("db", "id"), Value::Text("t".to_string())); + map.insert(kw("db", "ident"), kw("test", "attribute")); + let input = Value::Map(map.clone()); - // #[test] - // fn test_map_notation() { - // let mut expected: MapNotation = BTreeMap::default(); - // expected.insert(Entid::Ident(NamespacedKeyword::new("db", "id")), AtomOrLookupRefOrVectorOrMapNotation::Atom(Value::Text("t".to_string()))); - // expected.insert(Entid::Ident(NamespacedKeyword::new("db", "ident")), AtomOrLookupRefOrVectorOrMapNotation::Atom(kw("test", "attribute"))); - - // let mut map: BTreeMap = BTreeMap::default(); - // map.insert(kw("db", "id"), Value::Text("t".to_string())); - // map.insert(kw("db", "ident"), kw("test", "attribute")); - // let input = [Value::Map(map.clone())]; - // let mut parser = Tx::entity(); - // let result = parser.parse(&input[..]); - // assert_eq!(result, - // Ok((Entity::MapNotation(expected), - // &[][..]))); - // } + let mut parser = Tx::entity(); + let result = parser.parse(input.with_spans().into_atom_stream()).map(|x| x.0); + assert_eq!(result, + Ok(Entity::MapNotation(expected))); + } } From f7fb22ae7e9b47e39f3d536ed1ccf7e0213e1ac4 Mon Sep 17 00:00:00 2001 From: Nick Alexander Date: Mon, 3 Apr 2017 13:01:05 -0700 Subject: [PATCH 12/18] Review comment: Extract and use def_matches_* macros. --- parser-utils/src/value_and_span.rs | 53 ++++++++++++++++++++++++++++++ query-parser/src/parse.rs | 52 +++-------------------------- tx-parser/src/lib.rs | 30 ++++++++--------- 3 files changed, 72 insertions(+), 63 deletions(-) diff --git a/parser-utils/src/value_and_span.rs b/parser-utils/src/value_and_span.rs index 1fa49e56..13d9e7e5 100644 --- a/parser-utils/src/value_and_span.rs +++ b/parser-utils/src/value_and_span.rs @@ -352,6 +352,59 @@ pub fn keyword_map() -> Expected ParseResult ParseResult).expected("keyword map") } +/// Generate a `satisfy` expression that matches a `PlainSymbol` value with the given name. +/// +/// We do this rather than using `combine::token` so that we don't need to allocate a new `String` +/// inside a `PlainSymbol` inside a `SpannedValue` inside a `ValueAndSpan` just to match input. +#[macro_export] +macro_rules! def_matches_plain_symbol { + ( $parser: ident, $name: ident, $input: expr ) => { + def_parser!($parser, $name, edn::ValueAndSpan, { + satisfy(|v: edn::ValueAndSpan| { + match v.inner { + edn::SpannedValue::PlainSymbol(ref s) => s.0.as_str() == $input, + _ => false, + } + }) + }); + } +} + +/// Generate a `satisfy` expression that matches a `Keyword` value with the given name. +/// +/// We do this rather than using `combine::token` to save allocations. +#[macro_export] +macro_rules! def_matches_keyword { + ( $parser: ident, $name: ident, $input: expr ) => { + def_parser!($parser, $name, edn::ValueAndSpan, { + satisfy(|v: edn::ValueAndSpan| { + match v.inner { + edn::SpannedValue::Keyword(ref s) => s.0.as_str() == $input, + _ => false, + } + }) + }); + } +} + +/// Generate a `satisfy` expression that matches a `NamespacedKeyword` value with the given +/// namespace and name. +/// +/// We do this rather than using `combine::token` to save allocations. +#[macro_export] +macro_rules! def_matches_namespaced_keyword { + ( $parser: ident, $name: ident, $input_namespace: expr, $input_name: expr ) => { + def_parser!($parser, $name, edn::ValueAndSpan, { + satisfy(|v: edn::ValueAndSpan| { + match v.inner { + edn::SpannedValue::NamespacedKeyword(ref s) => s.namespace.as_str() == $input_namespace && s.name.as_str() == $input_name, + _ => false, + } + }) + }); + } +} + #[cfg(test)] mod tests { use combine::{eof}; diff --git a/query-parser/src/parse.rs b/query-parser/src/parse.rs index aedd7624..2dffefa2 100644 --- a/query-parser/src/parse.rs +++ b/query-parser/src/parse.rs @@ -283,26 +283,9 @@ def_parser!(Where, clauses, Vec, { pub struct Find; -/// TODO: extract macro for matching these `PlainSymbol` instances. -def_parser!(Find, period, edn::ValueAndSpan, { - satisfy(|v: edn::ValueAndSpan| { - if let edn::SpannedValue::PlainSymbol(ref s) = v.inner { - s.0.as_str() == "." - } else { - false - } - }) -}); +def_matches_plain_symbol!(Find, period, "."); -def_parser!(Find, ellipsis, edn::ValueAndSpan, { - satisfy(|v: edn::ValueAndSpan| { - if let edn::SpannedValue::PlainSymbol(ref s) = v.inner { - s.0.as_str() == "..." - } else { - false - } - }) -}); +def_matches_plain_symbol!(Find, ellipsis, "..."); def_parser!(Find, find_scalar, FindSpec, { Query::variable() @@ -352,36 +335,11 @@ def_parser!(Find, spec, FindSpec, { &mut try(Find::find_rel())]) }); -/// TODO: extract macro for matching these `Keyword` instances. -def_parser!(Find, literal_find, edn::ValueAndSpan, { - satisfy(|v: edn::ValueAndSpan| { - if let edn::SpannedValue::Keyword(ref s) = v.inner { - s.0.as_str() == "find" - } else { - false - } - }) -}); +def_matches_keyword!(Find, literal_find, "find"); -def_parser!(Find, literal_with, edn::ValueAndSpan, { - satisfy(|v: edn::ValueAndSpan| { - if let edn::SpannedValue::Keyword(ref s) = v.inner { - s.0.as_str() == "with" - } else { - false - } - }) -}); +def_matches_keyword!(Find, literal_with, "with"); -def_parser!(Find, literal_where, edn::ValueAndSpan, { - satisfy(|v: edn::ValueAndSpan| { - if let edn::SpannedValue::Keyword(ref s) = v.inner { - s.0.as_str() == "where" - } else { - false - } - }) -}); +def_matches_keyword!(Find, literal_where, "where"); /// Express something close to a builder pattern for a `FindQuery`. enum FindQueryPart { diff --git a/tx-parser/src/lib.rs b/tx-parser/src/lib.rs index e3c657c4..ca162b60 100644 --- a/tx-parser/src/lib.rs +++ b/tx-parser/src/lib.rs @@ -24,14 +24,11 @@ use combine::{ eof, many, parser, + satisfy, satisfy_map, Parser, ParseResult, }; -use edn::symbols::{ - NamespacedKeyword, - PlainSymbol, -}; use mentat_tx::entities::{ AtomOrLookupRefOrVectorOrMapNotation, Entid, @@ -50,7 +47,6 @@ use mentat_parser_utils::value_and_span::{ list, map, namespaced_keyword, - value, vector, }; @@ -65,9 +61,11 @@ def_parser!(Tx, entid, Entid, { .or(namespaced_keyword().map(|x| Entid::Ident(x))) }); +def_matches_plain_symbol!(Tx, literal_lookup_ref, "lookup-ref"); + def_parser!(Tx, lookup_ref, LookupRef, { list().of_exactly( - value(edn::Value::PlainSymbol(PlainSymbol::new("lookup-ref"))) + Tx::literal_lookup_ref() .with((Tx::entid(), Tx::atom())) .map(|(a, v)| LookupRef { a: a, v: v.without_spans() })) @@ -84,7 +82,7 @@ def_parser!(Tx, temp_id, TempId, { }); def_parser!(Tx, atom, edn::ValueAndSpan, { - satisfy_map(|x: edn::ValueAndSpan| x.into_atom().map(|v| v)) + satisfy_map(|x: edn::ValueAndSpan| x.into_atom()) }); def_parser!(Tx, nested_vector, Vec, { @@ -98,12 +96,13 @@ def_parser!(Tx, atom_or_lookup_ref_or_vector, AtomOrLookupRefOrVectorOrMapNotati .or(Tx::atom().map(AtomOrLookupRefOrVectorOrMapNotation::Atom)) }); +def_matches_namespaced_keyword!(Tx, literal_db_add, "db", "add"); + +def_matches_namespaced_keyword!(Tx, literal_db_retract, "db", "retract"); + def_parser!(Tx, add_or_retract, Entity, { - let add = value(edn::Value::NamespacedKeyword(NamespacedKeyword::new("db", "add"))) - .map(|_| OpType::Add); - let retract = value(edn::Value::NamespacedKeyword(NamespacedKeyword::new("db", "retract"))) - .map(|_| OpType::Retract); - let p = (add.or(retract), + vector().of_exactly( + (Tx::literal_db_add().map(|_| OpType::Add).or(Tx::literal_db_retract().map(|_| OpType::Retract)), Tx::entid_or_lookup_ref_or_temp_id(), Tx::entid(), Tx::atom_or_lookup_ref_or_vector()) @@ -114,9 +113,7 @@ def_parser!(Tx, add_or_retract, Entity, { a: a, v: v, } - }); - - vector().of_exactly(p) + })) }); def_parser!(Tx, map_notation, MapNotation, { @@ -189,8 +186,9 @@ mod tests { use std::collections::BTreeMap; use combine::Parser; - use edn::symbols::NamespacedKeyword; use edn::{ + NamespacedKeyword, + PlainSymbol, Span, SpannedValue, Value, From 678a116130d6e27cdae1733ef75640f45f16ae46 Mon Sep 17 00:00:00 2001 From: Nick Alexander Date: Mon, 3 Apr 2017 13:11:41 -0700 Subject: [PATCH 13/18] Review comment: .map() as late as possible. --- query-parser/src/parse.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/query-parser/src/parse.rs b/query-parser/src/parse.rs index 2dffefa2..f02d141a 100644 --- a/query-parser/src/parse.rs +++ b/query-parser/src/parse.rs @@ -289,16 +289,16 @@ def_matches_plain_symbol!(Find, ellipsis, "..."); def_parser!(Find, find_scalar, FindSpec, { Query::variable() - .map(|var| FindSpec::FindScalar(Element::Variable(var))) .skip(Find::period()) .skip(eof()) + .map(|var| FindSpec::FindScalar(Element::Variable(var))) }); def_parser!(Find, find_coll, FindSpec, { vector() .of_exactly(Query::variable() - .map(|var| FindSpec::FindColl(Element::Variable(var))) .skip(Find::ellipsis())) + .map(|var| FindSpec::FindColl(Element::Variable(var))) }); def_parser!(Find, elements, Vec, { @@ -314,7 +314,7 @@ def_parser!(Find, find_tuple, FindSpec, { .of_exactly(Find::elements().map(FindSpec::FindTuple)) }); -/// Parse a stream values into one of four find specs. +/// Parse a stream of values into one of four find specs. /// /// `:find` must be an array of plain var symbols (?foo), pull expressions, and aggregates. For now /// we only support variables and the annotations necessary to declare which flavor of :find we From c203046c16597d3759314080ceafcbf0c4b61c18 Mon Sep 17 00:00:00 2001 From: Nick Alexander Date: Mon, 3 Apr 2017 16:46:11 -0700 Subject: [PATCH 14/18] Part 1: Parse functions in where clauses. --- parser-utils/src/lib.rs | 16 +++- query-parser/src/parse.rs | 152 ++++++++++++++++++++++++++++++++++++++ query/src/lib.rs | 30 +++++++- 3 files changed, 194 insertions(+), 4 deletions(-) diff --git a/parser-utils/src/lib.rs b/parser-utils/src/lib.rs index 5e8cf923..c41f820b 100644 --- a/parser-utils/src/lib.rs +++ b/parser-utils/src/lib.rs @@ -38,8 +38,20 @@ pub type ResultParser = Expected ParseResult>>; #[macro_export] macro_rules! assert_parses_to { ( $parser: expr, $input: expr, $expected: expr ) => {{ - let mut par = $parser(); - let result = par.parse($input.with_spans().into_atom_stream()).map(|x| x.0); // TODO: check remainder of stream. + let par = $parser(); + let result = par.skip(eof()).parse($input.with_spans().into_atom_stream()).map(|x| x.0); + assert_eq!(result, Ok($expected)); + }} +} + +/// `assert_edn_parses_to!` simplifies some of the boilerplate around running a parser function +/// against string input and expecting a certain result. +#[macro_export] +macro_rules! assert_edn_parses_to { + ( $parser: expr, $input: expr, $expected: expr ) => {{ + let par = $parser(); + let input = edn::parse::value($input).expect("to be able to parse input as EDN"); + let result = par.skip(eof()).parse(input.into_atom_stream()).map(|x| x.0); assert_eq!(result, Ok($expected)); }} } diff --git a/query-parser/src/parse.rs b/query-parser/src/parse.rs index f02d141a..877c64c2 100644 --- a/query-parser/src/parse.rs +++ b/query-parser/src/parse.rs @@ -35,6 +35,7 @@ use self::mentat_parser_utils::value_and_span::{ }; use self::mentat_query::{ + Binding, Element, FindQuery, FindSpec, @@ -50,7 +51,9 @@ use self::mentat_query::{ SrcVar, UnifyVars, Variable, + VariableOrPlaceholder, WhereClause, + WhereFn, }; error_chain! { @@ -223,6 +226,25 @@ def_parser!(Where, pred, WhereClause, { }))) }); +/// A vector containing a parenthesized function expression and a binding. +def_parser!(Where, where_fn, WhereClause, { + // Accept either a nested list or a nested vector here: + // `[(foo ?x ?y) binding]` or `[[foo ?x ?y] binding]` + vector() + .of_exactly( + (seq().of_exactly( + (Query::predicate_fn(), Query::arguments())), + Bind::binding()) + .map(|((f, args), binding)| { + WhereClause::WhereFn( + WhereFn { + operator: f.0, + args: args, + binding: binding, + }) + })) +}); + def_parser!(Where, pattern, WhereClause, { vector() .of_exactly( @@ -273,6 +295,7 @@ def_parser!(Where, clause, WhereClause, { try(Where::or_clause()), try(Where::pred()), + try(Where::where_fn()), ]) }); @@ -391,6 +414,51 @@ def_parser!(Find, query, FindQuery, { }) }); +pub struct Bind; + +def_matches_plain_symbol!(Bind, placeholder, "_"); + +def_matches_plain_symbol!(Bind, ellipsis, "..."); + +def_parser!(Bind, bind_scalar, Binding, { + Query::variable() + .skip(eof()) + .map(|var: Variable| -> Binding { Binding::BindScalar(var) }) +}); + +def_parser!(Bind, variable_or_placeholder, VariableOrPlaceholder, { + Query::variable().map(VariableOrPlaceholder::Variable) + .or(Bind::placeholder().map(|_| VariableOrPlaceholder::Placeholder)) +}); + +def_parser!(Bind, bind_coll, Binding, { + vector() + .of_exactly(Query::variable() + .skip(Bind::ellipsis())) + .map(Binding::BindColl) +}); + +def_parser!(Bind, bind_rel, Binding, { + vector().of_exactly( + many1::, _>(Bind::variable_or_placeholder()) + .map(Binding::BindRel)) +}); + +def_parser!(Bind, bind_tuple, Binding, { + many1::, _>(Bind::variable_or_placeholder()) + .skip(eof()) + .map(Binding::BindTuple) +}); + +def_parser!(Bind, binding, Binding, { + // Any one of the four binding types might apply, so we combine them with `choice`. Our parsers + // consume input, so we need to wrap them in `try` so that they operate independently. + choice([try(Bind::bind_scalar()), + try(Bind::bind_coll()), + try(Bind::bind_tuple()), + try(Bind::bind_rel())]) +}); + pub fn parse_find_string(string: &str) -> Result { let expr = edn::parse::value(string)?; Find::query() @@ -410,6 +478,7 @@ mod test { use self::combine::Parser; use self::edn::OrderedFloat; use self::mentat_query::{ + Binding, Element, FindSpec, NonIntegerConstant, @@ -418,6 +487,7 @@ mod test { PatternValuePlace, SrcVar, Variable, + VariableOrPlaceholder, }; use super::*; @@ -623,4 +693,86 @@ mod test { FindSpec::FindTuple(vec![Element::Variable(variable(vx)), Element::Variable(variable(vy))])); } + + #[test] + fn test_bind_scalar() { + let vx = edn::PlainSymbol::new("?x"); + assert_edn_parses_to!(|| vector().of_exactly(Bind::binding()), + "[?x]", + Binding::BindScalar(variable(vx))); + } + + #[test] + fn test_bind_coll() { + let vx = edn::PlainSymbol::new("?x"); + assert_edn_parses_to!(|| vector().of_exactly(Bind::binding()), + "[[?x ...]]", + Binding::BindColl(variable(vx))); + } + + #[test] + fn test_bind_rel() { + let vx = edn::PlainSymbol::new("?x"); + let vy = edn::PlainSymbol::new("?y"); + let vw = edn::PlainSymbol::new("?w"); + assert_edn_parses_to!(|| vector().of_exactly(Bind::binding()), + "[[?x ?y _ ?w]]", + Binding::BindRel(vec![VariableOrPlaceholder::Variable(variable(vx)), + VariableOrPlaceholder::Variable(variable(vy)), + VariableOrPlaceholder::Placeholder, + VariableOrPlaceholder::Variable(variable(vw)), + ])); + } + + #[test] + fn test_bind_tuple() { + let vx = edn::PlainSymbol::new("?x"); + let vy = edn::PlainSymbol::new("?y"); + let vw = edn::PlainSymbol::new("?w"); + assert_edn_parses_to!(|| vector().of_exactly(Bind::binding()), + "[?x ?y _ ?w]", + Binding::BindTuple(vec![VariableOrPlaceholder::Variable(variable(vx)), + VariableOrPlaceholder::Variable(variable(vy)), + VariableOrPlaceholder::Placeholder, + VariableOrPlaceholder::Variable(variable(vw)), + ])); + } + + #[test] + fn test_where_fn() { + assert_edn_parses_to!(Where::where_fn, + "[(f ?x 1) ?y]", + WhereClause::WhereFn(WhereFn { + operator: edn::PlainSymbol::new("f"), + args: vec![FnArg::Variable(Variable::from_valid_name("?x")), + FnArg::EntidOrInteger(1)], + binding: Binding::BindScalar(Variable::from_valid_name("?y")), + })); + + assert_edn_parses_to!(Where::where_fn, + "[(f ?x) [?y ...]]", + WhereClause::WhereFn(WhereFn { + operator: edn::PlainSymbol::new("f"), + args: vec![FnArg::Variable(Variable::from_valid_name("?x"))], + binding: Binding::BindColl(Variable::from_valid_name("?y")), + })); + + // assert_edn_parses_to!(Where::where_fn, + // "[(f) [?y _]]", + // WhereClause::WhereFn(WhereFn { + // operator: edn::PlainSymbol::new("f"), + // args: vec![], + // binding: Binding::BindRel(vec![VariableOrPlaceholder::Variable(Variable::from_valid_name("?y")), + // VariableOrPlaceholder::Placeholder]), + // })); + + assert_edn_parses_to!(Where::where_fn, + "[(f) _ ?y]", + WhereClause::WhereFn(WhereFn { + operator: edn::PlainSymbol::new("f"), + args: vec![], + binding: Binding::BindTuple(vec![VariableOrPlaceholder::Placeholder, + VariableOrPlaceholder::Variable(Variable::from_valid_name("?y"))]), + })); + } } diff --git a/query/src/lib.rs b/query/src/lib.rs index b9037117..8c7eddb6 100644 --- a/query/src/lib.rs +++ b/query/src/lib.rs @@ -455,6 +455,25 @@ impl FindSpec { } } +// Datomic accepts variable or placeholder. DataScript accepts recursive bindings. Mentat sticks +// to the non-recursive form Datomic accepts, which is much simpler to process. +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum VariableOrPlaceholder { + Placeholder, + Variable(Variable), +} + +#[derive(Clone,Debug,Eq,PartialEq)] +pub enum Binding { + BindRel(Vec), + + BindColl(Variable), + + BindTuple(Vec), + + BindScalar(Variable), +} + // Note that the "implicit blank" rule applies. // A pattern with a reversed attribute — :foo/_bar — is reversed // at the point of parsing. These `Pattern` instances only represent @@ -510,6 +529,13 @@ pub struct Predicate { pub args: Vec, } +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct WhereFn { + pub operator: PlainSymbol, + pub args: Vec, + pub binding: Binding, +} + #[derive(Clone, Debug, Eq, PartialEq)] pub enum UnifyVars { /// `Implicit` means the variables in an `or` or `not` are derived from the enclosed pattern. @@ -577,7 +603,7 @@ pub enum WhereClause { NotJoin, OrJoin(OrJoin), Pred(Predicate), - WhereFn, + WhereFn(WhereFn), RuleExpr, Pattern(Pattern), } @@ -630,7 +656,7 @@ impl ContainsVariables for WhereClause { &Pattern(ref p) => p.accumulate_mentioned_variables(acc), &Not => (), &NotJoin => (), - &WhereFn => (), + &WhereFn(_) => (), &RuleExpr => (), } } From 08cae4bc7c63350bd3442968e88af6fa0410c29e Mon Sep 17 00:00:00 2001 From: Nick Alexander Date: Wed, 5 Apr 2017 11:01:47 -0700 Subject: [PATCH 15/18] Review comment: Find, not Bind. --- query-parser/src/parse.rs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/query-parser/src/parse.rs b/query-parser/src/parse.rs index 877c64c2..1a5870ef 100644 --- a/query-parser/src/parse.rs +++ b/query-parser/src/parse.rs @@ -310,6 +310,8 @@ def_matches_plain_symbol!(Find, period, "."); def_matches_plain_symbol!(Find, ellipsis, "..."); +def_matches_plain_symbol!(Find, placeholder, "_"); + def_parser!(Find, find_scalar, FindSpec, { Query::variable() .skip(Find::period()) @@ -416,10 +418,6 @@ def_parser!(Find, query, FindQuery, { pub struct Bind; -def_matches_plain_symbol!(Bind, placeholder, "_"); - -def_matches_plain_symbol!(Bind, ellipsis, "..."); - def_parser!(Bind, bind_scalar, Binding, { Query::variable() .skip(eof()) @@ -428,13 +426,13 @@ def_parser!(Bind, bind_scalar, Binding, { def_parser!(Bind, variable_or_placeholder, VariableOrPlaceholder, { Query::variable().map(VariableOrPlaceholder::Variable) - .or(Bind::placeholder().map(|_| VariableOrPlaceholder::Placeholder)) + .or(Find::placeholder().map(|_| VariableOrPlaceholder::Placeholder)) }); def_parser!(Bind, bind_coll, Binding, { vector() .of_exactly(Query::variable() - .skip(Bind::ellipsis())) + .skip(Find::ellipsis())) .map(Binding::BindColl) }); From 62fda71fbcfd61e838edda54e059f36ae2288dc6 Mon Sep 17 00:00:00 2001 From: Nick Alexander Date: Wed, 5 Apr 2017 11:02:09 -0700 Subject: [PATCH 16/18] Part 2: Implement apply_fulltext and column constraints. No bindings, yet. --- query-algebrizer/src/clauses/mod.rs | 6 +- query-algebrizer/src/clauses/pattern.rs | 3 +- query-algebrizer/src/clauses/predicate.rs | 161 +++++++++++++++++++++- query-algebrizer/src/clauses/resolve.rs | 2 +- query-algebrizer/src/errors.rs | 4 +- query-algebrizer/src/lib.rs | 2 + query-algebrizer/src/types.rs | 44 +++++- query-sql/src/lib.rs | 53 ++++++- query-translator/src/translate.rs | 21 +++ query-translator/tests/translate.rs | 19 ++- query/src/lib.rs | 3 + 11 files changed, 301 insertions(+), 17 deletions(-) diff --git a/query-algebrizer/src/clauses/mod.rs b/query-algebrizer/src/clauses/mod.rs index 1872b8f9..eaa3f869 100644 --- a/query-algebrizer/src/clauses/mod.rs +++ b/query-algebrizer/src/clauses/mod.rs @@ -96,6 +96,7 @@ fn unit_type_set(t: ValueType) -> HashSet { /// /// - Ordinary pattern clauses turn into `FROM` parts and `WHERE` parts using `=`. /// - Predicate clauses turn into the same, but with other functions. +/// - Function clauses turn `WHERE` parts using function-specific comparisons. /// - `not` turns into `NOT EXISTS` with `WHERE` clauses inside the subquery to /// bind it to the outer variables, or adds simple `WHERE` clauses to the outer /// clause. @@ -581,6 +582,9 @@ impl ConjoiningClauses { WhereClause::Pred(p) => { self.apply_predicate(schema, p) }, + WhereClause::WhereFn(f) => { + self.apply_where_fn(schema, f) + }, WhereClause::OrJoin(o) => { validate_or_join(&o) //?; @@ -606,4 +610,4 @@ fn add_attribute(schema: &mut Schema, e: Entid, a: Attribute) { #[cfg(test)] pub fn ident(ns: &str, name: &str) -> PatternNonValuePlace { PatternNonValuePlace::Ident(::std::rc::Rc::new(NamespacedKeyword::new(ns, name))) -} \ No newline at end of file +} diff --git a/query-algebrizer/src/clauses/pattern.rs b/query-algebrizer/src/clauses/pattern.rs index cede4ef1..fcdd480e 100644 --- a/query-algebrizer/src/clauses/pattern.rs +++ b/query-algebrizer/src/clauses/pattern.rs @@ -8,8 +8,6 @@ // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. -use std::rc::Rc; - use mentat_core::{ Schema, TypedValue, @@ -268,6 +266,7 @@ mod testing { use super::*; use std::collections::BTreeMap; + use std::rc::Rc; use mentat_core::attribute::Unique; use mentat_core::{ diff --git a/query-algebrizer/src/clauses/predicate.rs b/query-algebrizer/src/clauses/predicate.rs index 4242d9a0..cf9bb946 100644 --- a/query-algebrizer/src/clauses/predicate.rs +++ b/query-algebrizer/src/clauses/predicate.rs @@ -10,35 +10,45 @@ use mentat_core::{ Schema, + TypedValue, }; use mentat_query::{ + FnArg, + NonIntegerConstant, Predicate, + SrcVar, + WhereFn, }; use clauses::ConjoiningClauses; use errors::{ - Result, + Error, ErrorKind, + Result, }; use types::{ ColumnConstraint, + DatomsColumn, + DatomsTable, + FulltextColumn, + FulltextQualifiedAlias, NumericComparison, + QualifiedAlias, + QueryValue, }; /// Application of predicates. impl ConjoiningClauses { - /// There are several kinds of predicates/functions in our Datalog: + /// There are several kinds of predicates in our Datalog: /// - A limited set of binary comparison operators: < > <= >= !=. /// These are converted into SQLite binary comparisons and some type constraints. - /// - A set of predicates like `fulltext` and `get-else` that are translated into - /// SQL `MATCH`es or joins, yielding bindings. /// - In the future, some predicates that are implemented via function calls in SQLite. /// /// At present we have implemented only the five built-in comparison binary operators. - pub fn apply_predicate<'s, 'p>(&mut self, schema: &'s Schema, predicate: Predicate) -> Result<()> { + pub fn apply_predicate<'s>(&mut self, schema: &'s Schema, predicate: Predicate) -> Result<()> { // Because we'll be growing the set of built-in predicates, handling each differently, // and ultimately allowing user-specified predicates, we match on the predicate name first. if let Some(op) = NumericComparison::from_datalog_operator(predicate.operator.0.as_str()) { @@ -53,7 +63,7 @@ impl ConjoiningClauses { /// - Ensures that the predicate functions name a known operator. /// - Accumulates a `NumericInequality` constraint into the `wheres` list. #[allow(unused_variables)] - pub fn apply_numeric_predicate<'s, 'p>(&mut self, schema: &'s Schema, comparison: NumericComparison, predicate: Predicate) -> Result<()> { + pub fn apply_numeric_predicate<'s>(&mut self, schema: &'s Schema, comparison: NumericComparison, predicate: Predicate) -> Result<()> { if predicate.args.len() != 2 { bail!(ErrorKind::InvalidNumberOfArguments(predicate.operator.clone(), predicate.args.len(), 2)); } @@ -81,6 +91,96 @@ impl ConjoiningClauses { self.wheres.add_intersection(constraint); Ok(()) } + + + /// There are several kinds of functions binding variables in our Datalog: + /// - A set of functions like `fulltext` and `get-else` that are translated into + /// SQL `MATCH`es or joins, yielding bindings. + /// - In the future, some functions that are implemented via function calls in SQLite. + /// + /// At present we have implemented only the `fulltext` operator. + pub fn apply_where_fn<'s>(&mut self, schema: &'s Schema, where_fn: WhereFn) -> Result<()> { + // Because we'll be growing the set of built-in functions, handling each differently, and + // ultimately allowing user-specified functions, we match on the function name first. + match where_fn.operator.0.as_str() { + "fulltext" => self.apply_fulltext(schema, where_fn), + _ => bail!(ErrorKind::UnknownFunction(where_fn.operator.clone())), + } + } + + /// This function: + /// - Resolves variables and converts types to those more amenable to SQL. + /// - Ensures that the predicate functions name a known operator. + /// - Accumulates a `NumericInequality` constraint into the `wheres` list. + #[allow(unused_variables)] + pub fn apply_fulltext<'s>(&mut self, schema: &'s Schema, where_fn: WhereFn) -> Result<()> { + if where_fn.args.len() != 3 { + bail!(ErrorKind::InvalidNumberOfArguments(where_fn.operator.clone(), where_fn.args.len(), 3)); + } + + // Go from arguments -- parser output -- to columns or values. + // Any variables that aren't bound by this point in the linear processing of clauses will + // cause the application of the predicate to fail. + let mut args = where_fn.args.into_iter(); + + // TODO: process source variables. + match args.next().unwrap() { + FnArg::SrcVar(SrcVar::DefaultSrc) => {}, + _ => bail!(ErrorKind::InvalidArgument(where_fn.operator.clone(), "source variable".into(), 0)), + } + + // TODO: accept placeholder and set of attributes. Alternately, consider putting the search + // term before the attribute arguments and collect the (variadic) attributes into a set. + // let a: Entid = self.resolve_attribute_argument(&where_fn.operator, 1, args.next().unwrap())?; + // + // TODO: allow non-constant attributes. + // TODO: improve the expression of this matching, possibly by using attribute_for_* uniformly. + let a = match args.next().unwrap() { + FnArg::Ident(i) => schema.get_entid(&i), + // Must be an entid. + FnArg::EntidOrInteger(e) => Some(e), + _ => None, + }; + + let a = a.ok_or(ErrorKind::InvalidArgument(where_fn.operator.clone(), "attribute".into(), 1))?; + let attribute = schema.attribute_for_entid(a).cloned().ok_or(ErrorKind::InvalidArgument(where_fn.operator.clone(), "attribute".into(), 1))?; + + let fulltext_values = DatomsTable::FulltextValues; + let datoms_table = DatomsTable::Datoms; + + let fulltext_values_alias = (self.aliaser)(fulltext_values); + let datoms_table_alias = (self.aliaser)(datoms_table); + + // TODO: constrain types in more general cases? + self.constrain_attribute(datoms_table_alias.clone(), a); + + self.wheres.add_intersection(ColumnConstraint::Equals( + QualifiedAlias(datoms_table_alias, DatomsColumn::Value), + QueryValue::FulltextColumn(FulltextQualifiedAlias(fulltext_values_alias.clone(), FulltextColumn::Rowid)))); + + // search is either text or a variable. + // TODO: should this just use `resolve_argument`? Should it add a new `resolve_*` function? + let search = match args.next().unwrap() { + FnArg::Variable(var) => { + self.column_bindings + .get(&var) + .and_then(|cols| cols.first().map(|col| QueryValue::Column(col.clone()))) + .ok_or_else(|| Error::from_kind(ErrorKind::UnboundVariable(var.name())))? + }, + FnArg::Constant(NonIntegerConstant::Text(s)) => { + QueryValue::TypedValue(TypedValue::typed_string(s.as_str())) + }, + _ => bail!(ErrorKind::InvalidArgument(where_fn.operator.clone(), "string".into(), 2)), + }; + + // TODO: should we build the FQA in ::Matches, preventing nonsense like matching on ::Rowid? + let constraint = ColumnConstraint::Matches(FulltextQualifiedAlias(fulltext_values_alias.clone(), FulltextColumn::Text), search); + self.wheres.add_intersection(constraint); + + // TODO: process bindings! + + Ok(()) + } } #[cfg(test)] @@ -88,6 +188,8 @@ mod testing { use super::*; use std::collections::HashSet; + use std::rc::Rc; + use mentat_core::attribute::Unique; use mentat_core::{ Attribute, @@ -96,12 +198,14 @@ mod testing { }; use mentat_query::{ + Binding, FnArg, NamespacedKeyword, Pattern, PatternNonValuePlace, PatternValuePlace, PlainSymbol, + SrcVar, Variable, }; @@ -229,4 +333,47 @@ mod testing { .collect(), ValueType::String)); } -} \ No newline at end of file + + #[test] + fn test_apply_fulltext() { + let mut cc = ConjoiningClauses::default(); + let mut schema = Schema::default(); + + associate_ident(&mut schema, NamespacedKeyword::new("foo", "fts"), 100); + add_attribute(&mut schema, 100, Attribute { + value_type: ValueType::String, + index: true, + fulltext: true, + ..Default::default() + }); + + let op = PlainSymbol::new("fulltext"); + cc.apply_fulltext(&schema, WhereFn { + operator: op, + args: vec![ + FnArg::SrcVar(SrcVar::DefaultSrc), + FnArg::Ident(NamespacedKeyword::new("foo", "fts")), + FnArg::Constant(NonIntegerConstant::Text(Rc::new("needle".into()))), + ], + binding: Binding::BindScalar(Variable::from_valid_name("?z")), + }).expect("to be able to apply_fulltext"); + + assert!(!cc.is_known_empty); + + // Finally, expand column bindings. + cc.expand_column_bindings(); + assert!(!cc.is_known_empty); + + let clauses = cc.wheres; + assert_eq!(clauses.len(), 3); + + assert_eq!(clauses.0[0], ColumnConstraint::Equals(QualifiedAlias("datoms01".to_string(), DatomsColumn::Attribute), + QueryValue::Entid(100)).into()); + assert_eq!(clauses.0[1], ColumnConstraint::Equals(QualifiedAlias("datoms01".to_string(), DatomsColumn::Value), + QueryValue::FulltextColumn(FulltextQualifiedAlias("fulltext_values00".to_string(), FulltextColumn::Rowid))).into()); + assert_eq!(clauses.0[2], ColumnConstraint::Matches(FulltextQualifiedAlias("fulltext_values00".to_string(), FulltextColumn::Text), + QueryValue::TypedValue(TypedValue::String(Rc::new("needle".into())))).into()); + + // TODO: make assertions about types of columns. + } +} diff --git a/query-algebrizer/src/clauses/resolve.rs b/query-algebrizer/src/clauses/resolve.rs index aba0ba35..857e5af1 100644 --- a/query-algebrizer/src/clauses/resolve.rs +++ b/query-algebrizer/src/clauses/resolve.rs @@ -56,7 +56,7 @@ impl ConjoiningClauses { Constant(NonIntegerConstant::Text(_)) | Constant(NonIntegerConstant::BigInteger(_)) => { self.mark_known_empty(EmptyBecause::NonNumericArgument); - bail!(ErrorKind::NonNumericArgument(function.clone(), position)); + bail!(ErrorKind::InvalidArgument(function.clone(), "numeric".into(), position)); }, Constant(NonIntegerConstant::Float(f)) => Ok(QueryValue::TypedValue(TypedValue::Double(f))), } diff --git a/query-algebrizer/src/errors.rs b/query-algebrizer/src/errors.rs index dff08556..d6b4b333 100644 --- a/query-algebrizer/src/errors.rs +++ b/query-algebrizer/src/errors.rs @@ -35,9 +35,9 @@ error_chain! { display("unbound variable: {}", name) } - NonNumericArgument(function: PlainSymbol, position: usize) { + InvalidArgument(function: PlainSymbol, expected_type: String, position: usize) { description("invalid argument") - display("invalid argument to {}: expected numeric in position {}.", function, position) + display("invalid argument to {}: expected {} in position {}.", function, expected_type, position) } NonMatchingVariablesInOrClause { diff --git a/query-algebrizer/src/lib.rs b/query-algebrizer/src/lib.rs index a0e90db3..e386e108 100644 --- a/query-algebrizer/src/lib.rs +++ b/query-algebrizer/src/lib.rs @@ -99,6 +99,8 @@ pub use types::{ ColumnIntersection, DatomsColumn, DatomsTable, + FulltextColumn, + FulltextQualifiedAlias, QualifiedAlias, QueryValue, SourceAlias, diff --git a/query-algebrizer/src/types.rs b/query-algebrizer/src/types.rs index 121e5f85..a640c647 100644 --- a/query-algebrizer/src/types.rs +++ b/query-algebrizer/src/types.rs @@ -71,6 +71,23 @@ impl DatomsColumn { } } +/// One of the named columns of our fulltext values table. +#[derive(PartialEq, Eq, Clone, Debug)] +pub enum FulltextColumn { + Rowid, + Text, +} + +impl FulltextColumn { + pub fn as_str(&self) -> &'static str { + use self::FulltextColumn::*; + match *self { + Rowid => "rowid", + Text => "text", + } + } +} + /// A specific instance of a table within a query. E.g., "datoms123". pub type TableAlias = String; @@ -94,6 +111,16 @@ impl Debug for QualifiedAlias { } } +/// A particular column of a particular aliased fulltext table. E.g., "fulltext_values123", Rowid. +#[derive(PartialEq, Eq, Clone)] +pub struct FulltextQualifiedAlias(pub TableAlias, pub FulltextColumn); + +impl Debug for FulltextQualifiedAlias { + fn fmt(&self, f: &mut Formatter) -> Result { + write!(f, "{}.{}", self.0, self.1.as_str()) + } +} + impl QualifiedAlias { pub fn for_type_tag(&self) -> QualifiedAlias { QualifiedAlias(self.0.clone(), DatomsColumn::ValueTypeTag) @@ -103,6 +130,7 @@ impl QualifiedAlias { #[derive(PartialEq, Eq)] pub enum QueryValue { Column(QualifiedAlias), + FulltextColumn(FulltextQualifiedAlias), Entid(Entid), TypedValue(TypedValue), @@ -120,6 +148,9 @@ impl Debug for QueryValue { &Column(ref qa) => { write!(f, "{:?}", qa) }, + &FulltextColumn(ref qa) => { + write!(f, "{:?}", qa) + }, &Entid(ref entid) => { write!(f, "entity({:?})", entid) }, @@ -192,6 +223,9 @@ pub enum ColumnConstraint { right: QueryValue, }, HasType(TableAlias, ValueType), + // TODO: Merge this with NumericInequality? I expect the fine-grained information to be + // valuable when optimizing. + Matches(FulltextQualifiedAlias, QueryValue), } #[derive(PartialEq, Eq, Debug)] @@ -290,6 +324,10 @@ impl Debug for ColumnConstraint { write!(f, "{:?} {:?} {:?}", left, operator, right) }, + &Matches(ref qa, ref thing) => { + write!(f, "{:?} MATCHES {:?}", qa, thing) + }, + &HasType(ref qa, value_type) => { write!(f, "{:?}.value_type_tag = {:?}", qa, value_type) }, @@ -301,6 +339,7 @@ impl Debug for ColumnConstraint { pub enum EmptyBecause { // Var, existing, desired. TypeMismatch(Variable, HashSet, ValueType), + NonAttributeArgument, NonNumericArgument, NonStringFulltextValue, UnresolvedIdent(NamespacedKeyword), @@ -319,6 +358,9 @@ impl Debug for EmptyBecause { write!(f, "Type mismatch: {:?} can't be {:?}, because it's already {:?}", var, desired, existing) }, + &NonAttributeArgument => { + write!(f, "Non-attribute argument in attribute place") + }, &NonNumericArgument => { write!(f, "Non-numeric argument in numeric place") }, @@ -346,4 +388,4 @@ impl Debug for EmptyBecause { }, } } -} \ No newline at end of file +} diff --git a/query-sql/src/lib.rs b/query-sql/src/lib.rs index 1d3d7869..cc7c1e1f 100644 --- a/query-sql/src/lib.rs +++ b/query-sql/src/lib.rs @@ -20,6 +20,8 @@ use mentat_core::{ use mentat_query_algebrizer::{ DatomsColumn, + FulltextColumn, + FulltextQualifiedAlias, QualifiedAlias, QueryValue, SourceAlias, @@ -44,6 +46,7 @@ use mentat_sql::{ /// implementation for each storage backend. Passing `TypedValue`s here allows for that. pub enum ColumnOrExpression { Column(QualifiedAlias), + FulltextColumn(FulltextQualifiedAlias), Entid(Entid), // Because it's so common. Integer(i32), // We use these for type codes etc. Long(i64), @@ -55,6 +58,7 @@ impl From for ColumnOrExpression { fn from(v: QueryValue) -> Self { match v { QueryValue::Column(c) => ColumnOrExpression::Column(c), + QueryValue::FulltextColumn(c) => ColumnOrExpression::FulltextColumn(c), QueryValue::Entid(e) => ColumnOrExpression::Entid(e), QueryValue::PrimitiveLong(v) => ColumnOrExpression::Long(v), QueryValue::TypedValue(v) => ColumnOrExpression::Value(v), @@ -109,6 +113,14 @@ impl Constraint { right: right, } } + + pub fn fulltext_match(left: ColumnOrExpression, right: ColumnOrExpression) -> Constraint { + Constraint::Infix { + op: Op("MATCH"), // SQLite specific! + left: left, + right: right, + } + } } #[allow(dead_code)] @@ -157,6 +169,11 @@ fn push_column(qb: &mut QueryBuilder, col: &DatomsColumn) { qb.push_sql(col.as_str()); } +// We know that FulltextColumns are safe to serialize. +fn push_fulltext_column(qb: &mut QueryBuilder, col: &FulltextColumn) { + qb.push_sql(col.as_str()); +} + //--------------------------------------------------------- // Turn that representation into SQL. @@ -199,6 +216,12 @@ impl QueryFragment for ColumnOrExpression { push_column(out, column); Ok(()) }, + &FulltextColumn(FulltextQualifiedAlias(ref table, ref column)) => { + out.push_identifier(table.as_str())?; + out.push_sql("."); + push_fulltext_column(out, column); + Ok(()) + }, &Entid(entid) => { out.push_sql(entid.to_string().as_str()); Ok(()) @@ -406,13 +429,20 @@ impl SelectQuery { #[cfg(test)] mod tests { use super::*; + + use std::rc::Rc; + use mentat_query_algebrizer::DatomsTable; - fn build_constraint(c: Constraint) -> String { + fn build_constraint_query(c: Constraint) -> SQLQuery { let mut builder = SQLiteQueryBuilder::new(); c.push_sql(&mut builder) .map(|_| builder.finish()) - .unwrap().sql + .expect("to produce a query for the given constraint") + } + + fn build_constraint(c: Constraint) -> String { + build_constraint_query(c).sql } #[test] @@ -469,6 +499,25 @@ mod tests { assert_eq!("((123 = 456 AND 789 = 246))", build_constraint(c)); } + #[test] + fn test_matches_constraint() { + let c = Constraint::Infix { + op: Op("MATCHES"), + left: ColumnOrExpression::FulltextColumn(FulltextQualifiedAlias("fulltext01".to_string(), FulltextColumn::Text)), + right: ColumnOrExpression::Value(TypedValue::String(Rc::new("needle".to_string()))), + }; + let q = build_constraint_query(c); + assert_eq!("`fulltext01`.text MATCHES $v0", q.sql); + assert_eq!(vec![("$v0".to_string(), Rc::new("needle".to_string()))], q.args); + + let c = Constraint::Infix { + op: Op("="), + left: ColumnOrExpression::FulltextColumn(FulltextQualifiedAlias("fulltext01".to_string(), FulltextColumn::Rowid)), + right: ColumnOrExpression::Column(QualifiedAlias("datoms02".to_string(), DatomsColumn::Value)), + }; + assert_eq!("`fulltext01`.rowid = `datoms02`.v", build_constraint(c)); + } + #[test] fn test_end_to_end() { // [:find ?x :where [?x 65537 ?v] [?x 65536 ?v]] diff --git a/query-translator/src/translate.rs b/query-translator/src/translate.rs index e6b4b375..aa17a3d8 100644 --- a/query-translator/src/translate.rs +++ b/query-translator/src/translate.rs @@ -32,6 +32,8 @@ use mentat_query_algebrizer::{ ConjoiningClauses, DatomsColumn, DatomsTable, + FulltextColumn, + FulltextQualifiedAlias, QualifiedAlias, QueryValue, SourceAlias, @@ -69,6 +71,12 @@ impl ToColumn for QualifiedAlias { } } +impl ToColumn for FulltextQualifiedAlias { + fn to_column(self) -> ColumnOrExpression { + ColumnOrExpression::FulltextColumn(self) + } +} + impl ToConstraint for ColumnIntersection { fn to_constraint(self) -> Constraint { Constraint::And { @@ -108,6 +116,11 @@ impl ToConstraint for ColumnConstraint { Equals(left, QueryValue::Column(right)) => Constraint::equal(left.to_column(), right.to_column()), + Equals(left, QueryValue::FulltextColumn(right)) => + // TODO: figure out if this is the correct abstraction. Can we make it so that + // FulltextColumns::Text is not accepted here? + Constraint::equal(left.to_column(), right.to_column()), + Equals(qa, QueryValue::PrimitiveLong(value)) => { let tag_column = qa.for_type_tag().to_column(); let value_column = qa.to_column(); @@ -148,6 +161,14 @@ impl ToConstraint for ColumnConstraint { } }, + Matches(left, right) => { + Constraint::Infix { + op: Op("MATCH"), + left: ColumnOrExpression::FulltextColumn(left), + right: right.into(), + } + }, + HasType(table, value_type) => { let column = QualifiedAlias(table, DatomsColumn::ValueTypeTag).to_column(); Constraint::equal(column, ColumnOrExpression::Integer(value_type.value_type_tag())) diff --git a/query-translator/tests/translate.rs b/query-translator/tests/translate.rs index 29ad6c75..5cdbf722 100644 --- a/query-translator/tests/translate.rs +++ b/query-translator/tests/translate.rs @@ -58,6 +58,13 @@ fn prepopulated_schema() -> Schema { value_type: ValueType::String, ..Default::default() }); + associate_ident(&mut schema, NamespacedKeyword::new("foo", "fts"), 100); + add_attribute(&mut schema, 100, Attribute { + value_type: ValueType::String, + index: true, + fulltext: true, + ..Default::default() + }); schema } @@ -241,4 +248,14 @@ fn test_numeric_not_equals_known_attribute() { let SQLQuery { sql, args } = translate(&schema, input, None); assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v <> 12 LIMIT 1"); assert_eq!(args, vec![]); -} \ No newline at end of file +} + +#[test] +fn test_fulltext() { + let schema = prepopulated_schema(); + + let input = r#"[:find ?x . :where [(fulltext $ :foo/fts "yyy") [?x]]]"#; + let SQLQuery { sql, args } = translate(&schema, input, None); + assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v = $v0 LIMIT 1"); + assert_eq!(args, vec![make_arg("$v0", "yyy")]); +} diff --git a/query/src/lib.rs b/query/src/lib.rs index 8c7eddb6..857502d0 100644 --- a/query/src/lib.rs +++ b/query/src/lib.rs @@ -192,7 +192,10 @@ impl FromValue for FnArg { println!("from_value {}", v.inner); match v.inner { edn::SpannedValue::Integer(i) => Some(FnArg::EntidOrInteger(i)), + edn::SpannedValue::Boolean(b) => Some(FnArg::Constant(NonIntegerConstant::Boolean(b))), + edn::SpannedValue::BigInteger(x) => Some(FnArg::Constant(NonIntegerConstant::BigInteger(x))), edn::SpannedValue::Float(f) => Some(FnArg::Constant(NonIntegerConstant::Float(f))), + edn::SpannedValue::Text(ref s) => Some(FnArg::Constant(NonIntegerConstant::Text(Rc::new(s.clone())))), _ => unimplemented!(), }}) } From 6544ca15947bea55199b3b34162c1ae75b0bd6e1 Mon Sep 17 00:00:00 2001 From: Nick Alexander Date: Wed, 5 Apr 2017 15:30:22 -0700 Subject: [PATCH 17/18] Pre: Handle SrcVar. --- query/src/lib.rs | 47 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 14 deletions(-) diff --git a/query/src/lib.rs b/query/src/lib.rs index 857502d0..3d1aad1f 100644 --- a/query/src/lib.rs +++ b/query/src/lib.rs @@ -147,7 +147,11 @@ impl FromValue for SrcVar { impl SrcVar { pub fn from_symbol(sym: &PlainSymbol) -> Option { if sym.is_src_symbol() { - Some(SrcVar::NamedSrc(sym.plain_name().to_string())) + if sym.0 == "$" { + Some(SrcVar::DefaultSrc) + } else { + Some(SrcVar::NamedSrc(sym.plain_name().to_string())) + } } else { None } @@ -185,19 +189,34 @@ pub enum FnArg { impl FromValue for FnArg { fn from_value(v: edn::ValueAndSpan) -> Option { - // TODO: support SrcVars. - Variable::from_value(v.clone()) // TODO: don't clone! - .and_then(|v| Some(FnArg::Variable(v))) - .or_else(|| { - println!("from_value {}", v.inner); - match v.inner { - edn::SpannedValue::Integer(i) => Some(FnArg::EntidOrInteger(i)), - edn::SpannedValue::Boolean(b) => Some(FnArg::Constant(NonIntegerConstant::Boolean(b))), - edn::SpannedValue::BigInteger(x) => Some(FnArg::Constant(NonIntegerConstant::BigInteger(x))), - edn::SpannedValue::Float(f) => Some(FnArg::Constant(NonIntegerConstant::Float(f))), - edn::SpannedValue::Text(ref s) => Some(FnArg::Constant(NonIntegerConstant::Text(Rc::new(s.clone())))), - _ => unimplemented!(), - }}) + use edn::SpannedValue::*; + match v.inner { + Integer(x) => + Some(FnArg::EntidOrInteger(x)), + PlainSymbol(ref x) if x.is_src_symbol() => + SrcVar::from_symbol(x).map(FnArg::SrcVar), + PlainSymbol(ref x) if x.is_var_symbol() => + Variable::from_symbol(x).map(FnArg::Variable), + PlainSymbol(_) => None, + NamespacedKeyword(ref x) => + Some(FnArg::Ident(x.clone())), + Boolean(x) => + Some(FnArg::Constant(NonIntegerConstant::Boolean(x))), + Float(x) => + Some(FnArg::Constant(NonIntegerConstant::Float(x))), + BigInteger(ref x) => + Some(FnArg::Constant(NonIntegerConstant::BigInteger(x.clone()))), + Text(ref x) => + // TODO: intern strings. #398. + Some(FnArg::Constant(NonIntegerConstant::Text(Rc::new(x.clone())))), + Nil | + NamespacedSymbol(_) | + Keyword(_) | + Vector(_) | + List(_) | + Set(_) | + Map(_) => None, + } } } From 71d3aa29ed3b383f030e9b3d13eeef5a12820be1 Mon Sep 17 00:00:00 2001 From: Nick Alexander Date: Wed, 5 Apr 2017 15:31:05 -0700 Subject: [PATCH 18/18] Part 3: Start binding fulltext values. --- query-algebrizer/src/clauses/predicate.rs | 98 ++++++++++++++++++++++- query-projector/src/lib.rs | 4 +- query-translator/tests/translate.rs | 6 +- 3 files changed, 99 insertions(+), 9 deletions(-) diff --git a/query-algebrizer/src/clauses/predicate.rs b/query-algebrizer/src/clauses/predicate.rs index cf9bb946..9beaf1d0 100644 --- a/query-algebrizer/src/clauses/predicate.rs +++ b/query-algebrizer/src/clauses/predicate.rs @@ -11,13 +11,16 @@ use mentat_core::{ Schema, TypedValue, + ValueType, }; use mentat_query::{ + Binding, FnArg, NonIntegerConstant, Predicate, SrcVar, + VariableOrPlaceholder, WhereFn, }; @@ -118,6 +121,17 @@ impl ConjoiningClauses { bail!(ErrorKind::InvalidNumberOfArguments(where_fn.operator.clone(), where_fn.args.len(), 3)); } + // TODO: binding-specific error messages. + let mut bindings = match where_fn.binding { + Binding::BindRel(bindings) => { + if bindings.len() > 4 { + bail!(ErrorKind::InvalidNumberOfArguments(where_fn.operator.clone(), bindings.len(), 4)); + } + bindings.into_iter() + }, + _ => bail!(ErrorKind::InvalidArgument(where_fn.operator.clone(), "bindings".into(), 999)), + }; + // Go from arguments -- parser output -- to columns or values. // Any variables that aren't bound by this point in the linear processing of clauses will // cause the application of the predicate to fail. @@ -155,7 +169,7 @@ impl ConjoiningClauses { self.constrain_attribute(datoms_table_alias.clone(), a); self.wheres.add_intersection(ColumnConstraint::Equals( - QualifiedAlias(datoms_table_alias, DatomsColumn::Value), + QualifiedAlias(datoms_table_alias.clone(), DatomsColumn::Value), QueryValue::FulltextColumn(FulltextQualifiedAlias(fulltext_values_alias.clone(), FulltextColumn::Rowid)))); // search is either text or a variable. @@ -177,7 +191,61 @@ impl ConjoiningClauses { let constraint = ColumnConstraint::Matches(FulltextQualifiedAlias(fulltext_values_alias.clone(), FulltextColumn::Text), search); self.wheres.add_intersection(constraint); - // TODO: process bindings! + if let Some(VariableOrPlaceholder::Variable(var)) = bindings.next() { + // TODO: can we just check for late binding here? + // Do we have, or will we have, an external binding for this variable? + if self.bound_value(&var).is_some() || self.input_variables.contains(&var) { + // That's a paddlin'! + bail!(ErrorKind::InvalidArgument(where_fn.operator.clone(), "illegal bound variable".into(), 999)) + } + self.constrain_var_to_type(var.clone(), ValueType::Ref); + + let entity_alias = QualifiedAlias(datoms_table_alias.clone(), DatomsColumn::Entity); + self.column_bindings.entry(var).or_insert(vec![]).push(entity_alias); + } + + if let Some(VariableOrPlaceholder::Variable(var)) = bindings.next() { + // TODO: can we just check for late binding here? + // Do we have, or will we have, an external binding for this variable? + if self.bound_value(&var).is_some() || self.input_variables.contains(&var) { + // That's a paddlin'! + bail!(ErrorKind::InvalidArgument(where_fn.operator.clone(), "illegal bound variable".into(), 999)) + } + self.constrain_var_to_type(var.clone(), ValueType::String); + + // TODO: figure out how to represent a FulltextQualifiedAlias. + // let value_alias = FulltextQualifiedAlias(fulltext_values_alias.clone(), FulltextColumn::Text); + // self.column_bindings.entry(var).or_insert(vec![]).push(value_alias); + } + + if let Some(VariableOrPlaceholder::Variable(var)) = bindings.next() { + // TODO: can we just check for late binding here? + // Do we have, or will we have, an external binding for this variable? + if self.bound_value(&var).is_some() || self.input_variables.contains(&var) { + // That's a paddlin'! + bail!(ErrorKind::InvalidArgument(where_fn.operator.clone(), "illegal bound variable".into(), 999)) + } + self.constrain_var_to_type(var.clone(), ValueType::Ref); + + let tx_alias = QualifiedAlias(datoms_table_alias.clone(), DatomsColumn::Tx); + self.column_bindings.entry(var).or_insert(vec![]).push(tx_alias); + } + + if let Some(VariableOrPlaceholder::Variable(var)) = bindings.next() { + // TODO: can we just check for late binding here? + // Do we have, or will we have, an external binding for this variable? + if self.bound_value(&var).is_some() || self.input_variables.contains(&var) { + // That's a paddlin'! + bail!(ErrorKind::InvalidArgument(where_fn.operator.clone(), "illegal bound variable".into(), 999)) + } + self.constrain_var_to_type(var.clone(), ValueType::Double); + + // TODO: produce this using SQLite's matchinfo. + self.value_bindings.insert(var.clone(), TypedValue::Double(0.0.into())); + + // TODO: figure out how to represent a constant binding. + // self.column_bindings.entry(var).or_insert(vec![]).push(score_alias); + } Ok(()) } @@ -207,6 +275,7 @@ mod testing { PlainSymbol, SrcVar, Variable, + VariableOrPlaceholder, }; use clauses::{ @@ -355,7 +424,10 @@ mod testing { FnArg::Ident(NamespacedKeyword::new("foo", "fts")), FnArg::Constant(NonIntegerConstant::Text(Rc::new("needle".into()))), ], - binding: Binding::BindScalar(Variable::from_valid_name("?z")), + binding: Binding::BindRel(vec![VariableOrPlaceholder::Variable(Variable::from_valid_name("?entity")), + VariableOrPlaceholder::Variable(Variable::from_valid_name("?value")), + VariableOrPlaceholder::Variable(Variable::from_valid_name("?tx")), + VariableOrPlaceholder::Variable(Variable::from_valid_name("?score"))]), }).expect("to be able to apply_fulltext"); assert!(!cc.is_known_empty); @@ -374,6 +446,24 @@ mod testing { assert_eq!(clauses.0[2], ColumnConstraint::Matches(FulltextQualifiedAlias("fulltext_values00".to_string(), FulltextColumn::Text), QueryValue::TypedValue(TypedValue::String(Rc::new("needle".into())))).into()); - // TODO: make assertions about types of columns. + let bindings = cc.column_bindings; + assert_eq!(bindings.len(), 2); + + assert_eq!(bindings.get(&Variable::from_valid_name("?entity")).expect("column binding for ?entity").clone(), + vec![QualifiedAlias("datoms01".to_string(), DatomsColumn::Entity)]); + assert_eq!(bindings.get(&Variable::from_valid_name("?tx")).expect("column binding for ?tx").clone(), + vec![QualifiedAlias("datoms01".to_string(), DatomsColumn::Tx)]); + + let known_types = cc.known_types; + assert_eq!(known_types.len(), 4); + + assert_eq!(known_types.get(&Variable::from_valid_name("?entity")).expect("known types for ?entity").clone(), + vec![ValueType::Ref].into_iter().collect()); + assert_eq!(known_types.get(&Variable::from_valid_name("?value")).expect("known types for ?value").clone(), + vec![ValueType::String].into_iter().collect()); + assert_eq!(known_types.get(&Variable::from_valid_name("?tx")).expect("known types for ?tx").clone(), + vec![ValueType::Ref].into_iter().collect()); + assert_eq!(known_types.get(&Variable::from_valid_name("?score")).expect("known types for ?score").clone(), + vec![ValueType::Double].into_iter().collect()); } } diff --git a/query-projector/src/lib.rs b/query-projector/src/lib.rs index f1d94596..1df7a66e 100644 --- a/query-projector/src/lib.rs +++ b/query-projector/src/lib.rs @@ -210,7 +210,7 @@ fn project_elements<'a, I: IntoIterator>( let columns = query.cc .column_bindings .get(var) - .expect("Every variable has a binding"); + .expect(format!("Every variable should have a binding, but {} does not", var.as_str()).as_str()); let qa = columns[0].clone(); let name = column_name(var); @@ -490,4 +490,4 @@ pub fn query_projection(query: &AlgebraicQuery) -> CombinedProjection { }, } } -} \ No newline at end of file +} diff --git a/query-translator/tests/translate.rs b/query-translator/tests/translate.rs index 5cdbf722..d4786c50 100644 --- a/query-translator/tests/translate.rs +++ b/query-translator/tests/translate.rs @@ -254,8 +254,8 @@ fn test_numeric_not_equals_known_attribute() { fn test_fulltext() { let schema = prepopulated_schema(); - let input = r#"[:find ?x . :where [(fulltext $ :foo/fts "yyy") [?x]]]"#; + let input = r#"[:find ?entity ?value ?tx ?score :where [(fulltext $ :foo/fts "needle") [?entity ?value ?tx ?score]]]"#; let SQLQuery { sql, args } = translate(&schema, input, None); - assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v = $v0 LIMIT 1"); - assert_eq!(args, vec![make_arg("$v0", "yyy")]); + assert_eq!(sql, "SELECT `datoms00`.e AS `?entity`, `datoms00`.v AS `?value`, `datoms00`.tx AS `?tx`, 0.0 AS `?score` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v = $v0 LIMIT 1"); + assert_eq!(args, vec![make_arg("$v0", "needle")]); }