Improve parsing of nested edn::ValueAndSpan streams. r=rnewman (#393)

* Pre: Expose more in edn.

* Pre: Make it easier to work with ValueAndSpan.

with_spans() is a temporary hack, needed only because I don't care to
parse the bootstrap assertions from text right now.

* Part 1a: Add `value_and_span` for parsing nested `edn::ValueAndSpan` instances.

I wasn't able to abstract over `edn::Value` and `edn::ValueAndSpan`;
there are multiple obstacles.  I chose to roll with
`edn::ValueAndSpan` since it exposes the additional span information
that we will want to form good error messages in the future.

* Part 1b: Add keyword_map() parsing an `edn::Value::Vector` into an `edn::Value::map`.

* Part 1c: Add `Log`/`.log(...)` for logging parser progress.

This is a terrible hack, but it sure helps to debug complicated nested
parsers.  I don't even know what a principled approach would look
like; since our parser combinators are so frequently expressed in
code, it's hard to imagine a data-driven interpreter that can help
debug things.

* Part 2: Use `value_and_span` apparatus in tx-parser/.

I break an abstraction boundary by returning a value column
`edn::ValueAndSpan` rather than just an `edn::Value`.  That is, the
transaction processor shouldn't care where the `edn::Value` it is
processing arose -- even we care to track that information we should
bake it into the `Entity` type.  We do this because we need to
dynamically parse the value column to support nested maps, and parsing
requires a full `edn::ValueAndSpan`.  Alternately, we could cheat and
fake the spans when parsing nested maps, but that's potentially
expensive.

* Part 3: Use `value_and_span` apparatus in query-parser/.

* Part 4: Use `value_and_span` apparatus in root crate.

* Review comment: Make Span and SpanPosition Copy.

* Review comment: nits.

* Review comment: Make `or` be `or_exactly`.

I baked the eof checking directly into the parser, rather than using
the skip and eof parsers.  I also took the time to restore some tests
that were mistakenly commented out.

* Review comment: Extract and use def_matches_* macros.

* Review comment: .map() as late as possible.
This commit is contained in:
Nick Alexander 2017-04-06 10:06:28 -07:00 committed by GitHub
parent a5023c70cb
commit 5369f03464
20 changed files with 1101 additions and 1016 deletions

View file

@ -273,6 +273,6 @@ pub fn bootstrap_entities() -> Vec<Entity> {
// Failure here is a coding error (since the inputs are fixed), not a runtime error.
// TODO: represent these bootstrap data errors rather than just panicing.
let bootstrap_entities: Vec<Entity> = mentat_tx_parser::Tx::parse(&[bootstrap_assertions][..]).unwrap();
let bootstrap_entities: Vec<Entity> = mentat_tx_parser::Tx::parse(bootstrap_assertions.with_spans()).unwrap();
return bootstrap_entities;
}

View file

@ -1125,8 +1125,8 @@ mod tests {
fn transact<I>(&mut self, transaction: I) -> Result<TxReport> where I: Borrow<str> {
// Failure to parse the transaction is a coding error, so we unwrap.
let assertions = edn::parse::value(transaction.borrow()).expect(format!("to be able to parse {} into EDN", transaction.borrow()).as_str()).without_spans();
let entities: Vec<_> = mentat_tx_parser::Tx::parse(&[assertions.clone()][..]).expect(format!("to be able to parse {} into entities", assertions).as_str());
let assertions = edn::parse::value(transaction.borrow()).expect(format!("to be able to parse {} into EDN", transaction.borrow()).as_str());
let entities: Vec<_> = mentat_tx_parser::Tx::parse(assertions.clone()).expect(format!("to be able to parse {} into entities", assertions).as_str());
let details = {
// The block scopes the borrow of self.sqlite.

View file

@ -254,13 +254,13 @@ impl<'conn, 'a> Tx<'conn, 'a> {
let v = match v {
entmod::AtomOrLookupRefOrVectorOrMapNotation::Atom(v) => {
if attribute.value_type == ValueType::Ref && v.is_text() {
Either::Right(LookupRefOrTempId::TempId(temp_ids.intern(v.as_text().cloned().map(TempId::External).unwrap())))
if attribute.value_type == ValueType::Ref && v.inner.is_text() {
Either::Right(LookupRefOrTempId::TempId(temp_ids.intern(v.inner.as_text().cloned().map(TempId::External).unwrap())))
} else {
// Here is where we do schema-aware typechecking: we either assert that
// the given value is in the attribute's value set, or (in limited
// cases) coerce the value into the attribute's value set.
let typed_value: TypedValue = self.schema.to_typed_value(&v, &attribute)?;
let typed_value: TypedValue = self.schema.to_typed_value(&v.without_spans(), &attribute)?;
Either::Left(typed_value)
}
},

View file

@ -32,7 +32,7 @@ pub nil -> ValueAndSpan =
start:#position "nil" end:#position {
ValueAndSpan {
inner: SpannedValue::Nil,
span: Span(start, end)
span: Span::new(start, end)
}
}
@ -40,7 +40,7 @@ pub nan -> ValueAndSpan =
start:#position "#f" whitespace+ "NaN" end:#position {
ValueAndSpan {
inner: SpannedValue::Float(OrderedFloat(NAN)),
span: Span(start, end)
span: Span::new(start, end)
}
}
@ -48,7 +48,7 @@ pub infinity -> ValueAndSpan =
start:#position "#f" whitespace+ s:$(sign) "Infinity" end:#position {
ValueAndSpan {
inner: SpannedValue::Float(OrderedFloat(if s == "+" { INFINITY } else { NEG_INFINITY })),
span: Span(start, end)
span: Span::new(start, end)
}
}
@ -56,13 +56,13 @@ pub boolean -> ValueAndSpan =
start:#position "true" end:#position {
ValueAndSpan {
inner: SpannedValue::Boolean(true),
span: Span(start, end)
span: Span::new(start, end)
}
} /
start:#position "false" end:#position {
ValueAndSpan {
inner: SpannedValue::Boolean(false),
span: Span(start, end)
span: Span::new(start, end)
}
}
@ -77,7 +77,7 @@ pub bigint -> ValueAndSpan =
start:#position b:$( sign? digit+ ) "N" end:#position {
ValueAndSpan {
inner: SpannedValue::BigInteger(b.parse::<BigInt>().unwrap()),
span: Span(start, end)
span: Span::new(start, end)
}
}
@ -85,7 +85,7 @@ pub octalinteger -> ValueAndSpan =
start:#position "0" i:$( octaldigit+ ) end:#position {
ValueAndSpan {
inner: SpannedValue::Integer(i64::from_str_radix(i, 8).unwrap()),
span: Span(start, end)
span: Span::new(start, end)
}
}
@ -93,7 +93,7 @@ pub hexinteger -> ValueAndSpan =
start:#position "0x" i:$( hex+ ) end:#position {
ValueAndSpan {
inner: SpannedValue::Integer(i64::from_str_radix(i, 16).unwrap()),
span: Span(start, end)
span: Span::new(start, end)
}
}
@ -102,7 +102,7 @@ pub basedinteger -> ValueAndSpan =
start:#position b:$( validbase ) "r" i:$( alphanumeric+ ) end:#position {
ValueAndSpan {
inner: SpannedValue::Integer(i64::from_str_radix(i, b.parse::<u32>().unwrap()).unwrap()),
span: Span(start, end)
span: Span::new(start, end)
}
}
@ -110,7 +110,7 @@ pub integer -> ValueAndSpan =
start:#position i:$( sign? digit+ ) end:#position {
ValueAndSpan {
inner: SpannedValue::Integer(i.parse::<i64>().unwrap()),
span: Span(start, end)
span: Span::new(start, end)
}
}
@ -124,7 +124,7 @@ pub float -> ValueAndSpan =
start:#position f:$( frac_exp / exp / frac ) end:#position {
ValueAndSpan {
inner: SpannedValue::Float(OrderedFloat(f.parse::<f64>().unwrap())),
span: Span(start, end)
span: Span::new(start, end)
}
}
@ -138,7 +138,7 @@ pub text -> ValueAndSpan =
start:#position "\"" t:$( char* ) "\"" end:#position {
ValueAndSpan {
inner: SpannedValue::Text(t.to_string()),
span: Span(start, end)
span: Span::new(start, end)
}
}
@ -164,7 +164,7 @@ pub symbol -> ValueAndSpan =
end:#position {
ValueAndSpan {
inner: SpannedValue::from_symbol(ns, n),
span: Span(start, end)
span: Span::new(start, end)
}
}
@ -176,7 +176,7 @@ pub keyword -> ValueAndSpan =
end:#position {
ValueAndSpan {
inner: SpannedValue::from_keyword(ns, n),
span: Span(start, end)
span: Span::new(start, end)
}
}
@ -184,7 +184,7 @@ pub list -> ValueAndSpan =
start:#position "(" __ v:(value)* __ ")" end:#position {
ValueAndSpan {
inner: SpannedValue::List(LinkedList::from_iter(v)),
span: Span(start, end)
span: Span::new(start, end)
}
}
@ -192,7 +192,7 @@ pub vector -> ValueAndSpan =
start:#position "[" __ v:(value)* __ "]" end:#position {
ValueAndSpan {
inner: SpannedValue::Vector(v),
span: Span(start, end)
span: Span::new(start, end)
}
}
@ -200,7 +200,7 @@ pub set -> ValueAndSpan =
start:#position "#{" __ v:(value)* __ "}" end:#position {
ValueAndSpan {
inner: SpannedValue::Set(BTreeSet::from_iter(v)),
span: Span(start, end)
span: Span::new(start, end)
}
}
@ -213,7 +213,7 @@ pub map -> ValueAndSpan =
start:#position "{" __ v:(pair)* __ "}" end:#position {
ValueAndSpan {
inner: SpannedValue::Map(BTreeMap::from_iter(v)),
span: Span(start, end)
span: Span::new(start, end)
}
}

View file

@ -26,5 +26,5 @@ pub mod parse {
pub use num::BigInt;
pub use ordered_float::OrderedFloat;
pub use parse::ParseError;
pub use types::Value;
pub use types::{Span, SpannedValue, Value, ValueAndSpan};
pub use symbols::{Keyword, NamespacedKeyword, PlainSymbol, NamespacedSymbol};

View file

@ -66,8 +66,14 @@ pub enum SpannedValue {
}
/// Span represents the current offset (start, end) into the input string.
#[derive(PartialEq, Eq, Hash, Clone, Debug)]
pub struct Span(pub usize, pub usize);
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
pub struct Span(pub u32, pub u32);
impl Span {
pub fn new(start: usize, end: usize) -> Span {
Span(start as u32, end as u32)
}
}
/// A wrapper type around `SpannedValue` and `Span`, representing some EDN value
/// and the parsing offset (start, end) in the original EDN string.
@ -77,6 +83,40 @@ pub struct ValueAndSpan {
pub span: Span,
}
impl ValueAndSpan {
pub fn new<I>(spanned_value: SpannedValue, span: I) -> ValueAndSpan where I: Into<Option<Span>> {
ValueAndSpan {
inner: spanned_value,
span: span.into().unwrap_or(Span(0, 0)), // TODO: consider if this has implications.
}
}
pub fn into_atom(self) -> Option<ValueAndSpan> {
if self.inner.is_atom() {
Some(self)
} else {
None
}
}
pub fn into_text(self) -> Option<String> {
self.inner.into_text()
}
}
impl Value {
/// For debug use only!
///
/// But right now, it's used in the bootstrapper. We'll fix that soon.
pub fn with_spans(self) -> ValueAndSpan {
let s = self.to_pretty(120).unwrap();
use ::parse;
let with_spans = parse::value(&s).unwrap();
assert_eq!(self, with_spans.clone().without_spans());
with_spans
}
}
impl From<SpannedValue> for Value {
fn from(src: SpannedValue) -> Value {
match src {

View file

@ -11,8 +11,20 @@
extern crate combine;
extern crate edn;
use combine::ParseResult;
use combine::combinator::{Expected, FnParser};
use combine::{
ParseResult,
};
use combine::combinator::{
Expected,
FnParser,
};
pub mod log;
pub mod value_and_span;
pub use log::{
LogParsing,
};
/// A type definition for a function parser that either parses an `O` from an input stream of type
/// `I`, or fails with an "expected" failure.
@ -25,10 +37,10 @@ pub type ResultParser<O, I> = Expected<FnParser<I, fn(I) -> ParseResult<O, I>>>;
/// parser function against input and expecting a certain result.
#[macro_export]
macro_rules! assert_parses_to {
( $parser: path, $input: expr, $expected: expr ) => {{
( $parser: expr, $input: expr, $expected: expr ) => {{
let mut par = $parser();
let result = par.parse(&$input[..]);
assert_eq!(result, Ok(($expected, &[][..])));
let result = par.parse($input.with_spans().into_atom_stream()).map(|x| x.0); // TODO: check remainder of stream.
assert_eq!(result, Ok($expected));
}}
}
@ -82,6 +94,20 @@ macro_rules! def_parser_fn {
}
}
#[macro_export]
macro_rules! def_parser {
( $parser: ident, $name: ident, $result_type: ty, $body: block ) => {
impl $parser {
fn $name() -> ResultParser<$result_type, $crate::value_and_span::Stream> {
fn inner(input: $crate::value_and_span::Stream) -> ParseResult<$result_type, $crate::value_and_span::Stream> {
$body.parse_lazy(input).into()
}
parser(inner as fn($crate::value_and_span::Stream) -> ParseResult<$result_type, $crate::value_and_span::Stream>).expected(stringify!($name))
}
}
}
}
/// `def_value_parser_fn` is a short-cut to `def_parser_fn` with the input type
/// being `edn::Value`.
#[macro_export]

87
parser-utils/src/log.rs Normal file
View file

@ -0,0 +1,87 @@
// Copyright 2016 Mozilla
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use
// this file except in compliance with the License. You may obtain a copy of the
// License at http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
use std::io::Write;
use combine::{
ParseError,
Parser,
ParseResult,
Stream,
};
/// println!, but to stderr.
///
/// Doesn't pollute stdout, which is useful when running tests under Emacs, which parses the output
/// of the test suite to format errors and can get confused when user output is interleaved into the
/// stdout stream.
///
/// Cribbed from http://stackoverflow.com/a/27590832.
macro_rules! println_stderr(
($($arg:tt)*) => { {
let r = writeln!(&mut ::std::io::stderr(), $($arg)*);
r.expect("failed printing to stderr");
} }
);
#[derive(Clone)]
pub struct Log<P, T>(P, T)
where P: Parser,
T: ::std::fmt::Debug;
impl<I, P, T> Parser for Log<P, T>
where I: Stream,
I::Item: ::std::fmt::Debug,
P: Parser<Input = I>,
P::Output: ::std::fmt::Debug,
T: ::std::fmt::Debug,
{
type Input = I;
type Output = P::Output;
fn parse_stream(&mut self, input: I) -> ParseResult<Self::Output, I> {
let head = input.clone().uncons();
let result = self.0.parse_stream(input.clone());
match result {
Ok((ref value, _)) => println_stderr!("{:?}: [{:?} ...] => Ok({:?})", self.1, head.ok(), value),
Err(_) => println_stderr!("{:?}: [{:?} ...] => Err(_)", self.1, head.ok()),
}
result
}
fn add_error(&mut self, errors: &mut ParseError<Self::Input>) {
self.0.add_error(errors);
}
}
#[inline(always)]
pub fn log<P, T>(p: P, msg: T) -> Log<P, T>
where P: Parser,
T: ::std::fmt::Debug,
{
Log(p, msg)
}
/// We need a trait to define `Parser.log` and have it live outside of the `combine` crate.
pub trait LogParsing: Parser + Sized {
fn log<T>(self, msg: T) -> Log<Self, T>
where Self: Sized,
T: ::std::fmt::Debug;
}
impl<P> LogParsing for P
where P: Parser,
{
fn log<T>(self, msg: T) -> Log<Self, T>
where T: ::std::fmt::Debug,
{
log(self, msg)
}
}

View file

@ -0,0 +1,461 @@
// Copyright 2016 Mozilla
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use
// this file except in compliance with the License. You may obtain a copy of the
// License at http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
use std;
use std::fmt::{
Debug,
Display,
Formatter,
};
use std::cmp::Ordering;
use combine::{
ConsumedResult,
ParseError,
Parser,
ParseResult,
StreamOnce,
many,
many1,
parser,
satisfy,
satisfy_map,
};
use combine::primitives; // To not shadow Error.
use combine::primitives::{
Consumed,
FastResult,
};
use combine::combinator::{
Expected,
FnParser,
};
use edn;
/// A wrapper to let us order `edn::Span` in whatever way is appropriate for parsing with `combine`.
#[derive(Clone, Copy, Debug)]
pub struct SpanPosition(edn::Span);
impl Display for SpanPosition {
fn fmt(&self, f: &mut Formatter) -> ::std::fmt::Result {
self.0.fmt(f)
}
}
impl PartialEq for SpanPosition {
fn eq(&self, other: &Self) -> bool {
self.cmp(other) == Ordering::Equal
}
}
impl Eq for SpanPosition { }
impl PartialOrd for SpanPosition {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Ord for SpanPosition {
fn cmp(&self, other: &Self) -> Ordering {
(self.0).0.cmp(&(other.0).0)
}
}
/// An iterator specifically for iterating `edn::ValueAndSpan` instances in various ways.
///
/// Enumerating each iteration type allows us to have a single `combine::Stream` implementation
/// yielding `ValueAndSpan` items, which allows us to yield uniform `combine::ParseError` types from
/// disparate parsers.
#[derive(Clone)]
pub enum IntoIter {
Empty(std::iter::Empty<edn::ValueAndSpan>),
Atom(std::iter::Once<edn::ValueAndSpan>),
Vector(std::vec::IntoIter<edn::ValueAndSpan>),
List(std::collections::linked_list::IntoIter<edn::ValueAndSpan>),
/// Iterates via a single `flat_map` [k1, v1, k2, v2, ...].
Map(std::vec::IntoIter<edn::ValueAndSpan>),
// TODO: Support Set and Map more naturally. This is significantly more work because the
// existing BTreeSet and BTreeMap iterators do not implement Clone, and implementing Clone for
// them is involved. Since we don't really need to parse sets and maps at this time, this will
// do for now.
}
impl Iterator for IntoIter {
type Item = edn::ValueAndSpan;
fn next(&mut self) -> Option<Self::Item> {
match *self {
IntoIter::Empty(ref mut i) => i.next(),
IntoIter::Atom(ref mut i) => i.next(),
IntoIter::Vector(ref mut i) => i.next(),
IntoIter::List(ref mut i) => i.next(),
IntoIter::Map(ref mut i) => i.next(),
}
}
}
/// A single `combine::Stream` implementation iterating `edn::ValueAndSpan` instances. Equivalent
/// to `combine::IteratorStream` as produced by `combine::from_iter`, but specialized to
/// `edn::ValueAndSpan`.
#[derive(Clone)]
pub struct Stream(IntoIter, SpanPosition);
/// Things specific to parsing with `combine` and our `Stream` that need a trait to live outside of
/// the `edn` crate.
pub trait Item: Clone + PartialEq + Sized {
/// Position could be specialized to `SpanPosition`.
type Position: Clone + Ord + std::fmt::Display;
/// A slight generalization of `combine::Positioner` that allows to set the position based on
/// the `edn::ValueAndSpan` being iterated.
fn start(&self) -> Self::Position;
fn update_position(&self, &mut Self::Position);
fn into_child_stream_iter(self) -> IntoIter;
fn into_child_stream(self) -> Stream;
fn into_atom_stream_iter(self) -> IntoIter;
fn into_atom_stream(self) -> Stream;
}
impl Item for edn::ValueAndSpan {
type Position = SpanPosition;
fn start(&self) -> Self::Position {
SpanPosition(self.span.clone())
}
fn update_position(&self, position: &mut Self::Position) {
*position = SpanPosition(self.span.clone())
}
fn into_child_stream_iter(self) -> IntoIter {
match self.inner {
edn::SpannedValue::Vector(values) => IntoIter::Vector(values.into_iter()),
edn::SpannedValue::List(values) => IntoIter::List(values.into_iter()),
// Parsing pairs with `combine` is tricky; parsing sequences is easy.
edn::SpannedValue::Map(map) => IntoIter::Map(map.into_iter().flat_map(|(a, v)| std::iter::once(a).chain(std::iter::once(v))).collect::<Vec<_>>().into_iter()),
_ => IntoIter::Empty(std::iter::empty()),
}
}
fn into_child_stream(self) -> Stream {
let span = self.span.clone();
Stream(self.into_child_stream_iter(), SpanPosition(span))
}
fn into_atom_stream_iter(self) -> IntoIter {
IntoIter::Atom(std::iter::once(self))
}
fn into_atom_stream(self) -> Stream {
let span = self.span.clone();
Stream(self.into_atom_stream_iter(), SpanPosition(span))
}
}
/// `OfExactly` and `of_exactly` allow us to express nested parsers naturally.
///
/// For example, `vector().of_exactly(many(list()))` parses a vector-of-lists, like [(1 2) (:a :b) ("test") ()].
///
/// The "outer" parser `P` and the "nested" parser `N` must be compatible: `P` must produce an
/// output `edn::ValueAndSpan` which can itself be turned into a stream of child elements; and `N`
/// must accept the resulting input `Stream`. This compatibility allows us to lift errors from the
/// nested parser to the outer parser, which is part of what has made parsing `&'a [edn::Value]`
/// difficult.
#[derive(Clone)]
pub struct OfExactly<P, N>(P, N);
impl<P, N, O> Parser for OfExactly<P, N>
where P: Parser<Input=Stream, Output=edn::ValueAndSpan>,
N: Parser<Input=Stream, Output=O>,
{
type Input = P::Input;
type Output = O;
#[inline]
fn parse_lazy(&mut self, input: Self::Input) -> ConsumedResult<Self::Output, Self::Input> {
use self::FastResult::*;
match self.0.parse_lazy(input) {
ConsumedOk((outer_value, outer_input)) => {
match self.1.parse_lazy(outer_value.into_child_stream()) {
ConsumedOk((inner_value, mut inner_input)) | EmptyOk((inner_value, mut inner_input)) => {
match inner_input.uncons() {
Err(ref err) if *err == primitives::Error::end_of_input() => ConsumedOk((inner_value, outer_input)),
_ => EmptyErr(ParseError::empty(inner_input.position())),
}
},
// TODO: Improve the error output to reference the nested value (or span) in
// some way. This seems surprisingly difficult to do, so we just surface the
// inner error message right now. See also the comment below.
EmptyErr(e) | ConsumedErr(e) => ConsumedErr(e),
}
},
EmptyOk((outer_value, outer_input)) => {
match self.1.parse_lazy(outer_value.into_child_stream()) {
ConsumedOk((inner_value, mut inner_input)) | EmptyOk((inner_value, mut inner_input)) => {
match inner_input.uncons() {
Err(ref err) if *err == primitives::Error::end_of_input() => EmptyOk((inner_value, outer_input)),
_ => EmptyErr(ParseError::empty(inner_input.position())),
}
},
// TODO: Improve the error output. See the comment above.
EmptyErr(e) | ConsumedErr(e) => EmptyErr(e),
}
},
ConsumedErr(e) => ConsumedErr(e),
EmptyErr(e) => EmptyErr(e),
}
}
fn add_error(&mut self, errors: &mut ParseError<Self::Input>) {
self.0.add_error(errors);
}
}
#[inline(always)]
pub fn of_exactly<P, N, O>(p: P, n: N) -> OfExactly<P, N>
where P: Parser<Input=Stream, Output=edn::ValueAndSpan>,
N: Parser<Input=Stream, Output=O>,
{
OfExactly(p, n)
}
/// We need a trait to define `Parser.of` and have it live outside of the `combine` crate.
pub trait OfExactlyParsing: Parser + Sized {
fn of_exactly<N, O>(self, n: N) -> OfExactly<Self, N>
where Self: Sized,
N: Parser<Input = Self::Input, Output=O>;
}
impl<P> OfExactlyParsing for P
where P: Parser<Input=Stream, Output=edn::ValueAndSpan>
{
fn of_exactly<N, O>(self, n: N) -> OfExactly<P, N>
where N: Parser<Input = Self::Input, Output=O>
{
of_exactly(self, n)
}
}
/// Equivalent to `combine::IteratorStream`.
impl StreamOnce for Stream
{
type Item = edn::ValueAndSpan;
type Range = edn::ValueAndSpan;
type Position = SpanPosition;
#[inline]
fn uncons(&mut self) -> std::result::Result<Self::Item, primitives::Error<Self::Item, Self::Item>> {
match self.0.next() {
Some(x) => {
x.update_position(&mut self.1);
Ok(x)
},
None => Err(primitives::Error::end_of_input()),
}
}
#[inline(always)]
fn position(&self) -> Self::Position {
self.1.clone()
}
}
/// Shorthands, just enough to convert the `mentat_db` crate for now. Written using `Box` for now:
/// it's simple and we can address allocation issues if and when they surface.
pub fn vector() -> Box<Parser<Input=Stream, Output=edn::ValueAndSpan>> {
satisfy(|v: edn::ValueAndSpan| v.inner.is_vector()).boxed()
}
pub fn list() -> Box<Parser<Input=Stream, Output=edn::ValueAndSpan>> {
satisfy(|v: edn::ValueAndSpan| v.inner.is_list()).boxed()
}
pub fn map() -> Box<Parser<Input=Stream, Output=edn::ValueAndSpan>> {
satisfy(|v: edn::ValueAndSpan| v.inner.is_map()).boxed()
}
pub fn seq() -> Box<Parser<Input=Stream, Output=edn::ValueAndSpan>> {
satisfy(|v: edn::ValueAndSpan| v.inner.is_list() || v.inner.is_vector()).boxed()
}
pub fn integer() -> Box<Parser<Input=Stream, Output=i64>> {
satisfy_map(|v: edn::ValueAndSpan| v.inner.as_integer()).boxed()
}
pub fn namespaced_keyword() -> Box<Parser<Input=Stream, Output=edn::NamespacedKeyword>> {
satisfy_map(|v: edn::ValueAndSpan| v.inner.as_namespaced_keyword().cloned()).boxed()
}
/// Like `combine::token()`, but compare an `edn::Value` to an `edn::ValueAndSpan`.
pub fn value(value: edn::Value) -> Box<Parser<Input=Stream, Output=edn::ValueAndSpan>> {
// TODO: make this comparison faster. Right now, we drop all the spans; if we walked the value
// trees together, we could avoid creating garbage.
satisfy(move |v: edn::ValueAndSpan| value == v.inner.into()).boxed()
}
fn keyword_map_(input: Stream) -> ParseResult<edn::ValueAndSpan, Stream>
{
// One run is a keyword followed by one or more non-keywords.
let run = (satisfy(|v: edn::ValueAndSpan| v.inner.is_keyword()),
many1(satisfy(|v: edn::ValueAndSpan| !v.inner.is_keyword()))
.map(|vs: Vec<edn::ValueAndSpan>| {
// TODO: extract "spanning".
let beg = vs.first().unwrap().span.0;
let end = vs.last().unwrap().span.1;
edn::ValueAndSpan {
inner: edn::SpannedValue::Vector(vs),
span: edn::Span(beg, end),
}
}));
let mut runs = vector().of_exactly(many::<Vec<_>, _>(run));
let (data, input) = try!(runs.parse_lazy(input).into());
let mut m: std::collections::BTreeMap<edn::ValueAndSpan, edn::ValueAndSpan> = std::collections::BTreeMap::default();
for (k, vs) in data {
if m.insert(k, vs).is_some() {
// TODO: improve this message.
return Err(Consumed::Empty(ParseError::from_errors(input.into_inner().position(), Vec::new())))
}
}
let map = edn::ValueAndSpan {
inner: edn::SpannedValue::Map(m),
span: edn::Span(0, 0), // TODO: fix this.
};
Ok((map, input))
}
/// Turn a vector of keywords and non-keyword values into a map. As an example, turn
/// ```edn
/// [:keyword1 value1 value2 ... :keyword2 value3 value4 ...]
/// ```
/// into
/// ```edn
/// {:keyword1 [value1 value2 ...] :keyword2 [value3 value4 ...]}
/// ```.
pub fn keyword_map() -> Expected<FnParser<Stream, fn(Stream) -> ParseResult<edn::ValueAndSpan, Stream>>>
{
// The `as` work arounds https://github.com/rust-lang/rust/issues/20178.
parser(keyword_map_ as fn(Stream) -> ParseResult<edn::ValueAndSpan, Stream>).expected("keyword map")
}
/// Generate a `satisfy` expression that matches a `PlainSymbol` value with the given name.
///
/// We do this rather than using `combine::token` so that we don't need to allocate a new `String`
/// inside a `PlainSymbol` inside a `SpannedValue` inside a `ValueAndSpan` just to match input.
#[macro_export]
macro_rules! def_matches_plain_symbol {
( $parser: ident, $name: ident, $input: expr ) => {
def_parser!($parser, $name, edn::ValueAndSpan, {
satisfy(|v: edn::ValueAndSpan| {
match v.inner {
edn::SpannedValue::PlainSymbol(ref s) => s.0.as_str() == $input,
_ => false,
}
})
});
}
}
/// Generate a `satisfy` expression that matches a `Keyword` value with the given name.
///
/// We do this rather than using `combine::token` to save allocations.
#[macro_export]
macro_rules! def_matches_keyword {
( $parser: ident, $name: ident, $input: expr ) => {
def_parser!($parser, $name, edn::ValueAndSpan, {
satisfy(|v: edn::ValueAndSpan| {
match v.inner {
edn::SpannedValue::Keyword(ref s) => s.0.as_str() == $input,
_ => false,
}
})
});
}
}
/// Generate a `satisfy` expression that matches a `NamespacedKeyword` value with the given
/// namespace and name.
///
/// We do this rather than using `combine::token` to save allocations.
#[macro_export]
macro_rules! def_matches_namespaced_keyword {
( $parser: ident, $name: ident, $input_namespace: expr, $input_name: expr ) => {
def_parser!($parser, $name, edn::ValueAndSpan, {
satisfy(|v: edn::ValueAndSpan| {
match v.inner {
edn::SpannedValue::NamespacedKeyword(ref s) => s.namespace.as_str() == $input_namespace && s.name.as_str() == $input_name,
_ => false,
}
})
});
}
}
#[cfg(test)]
mod tests {
use combine::{eof};
use super::*;
/// Take a string `input` and a string `expected` and ensure that `input` parses to an
/// `edn::Value` keyword map equivalent to the `edn::Value` that `expected` parses to.
macro_rules! assert_keyword_map_eq {
( $input: expr, $expected: expr ) => {{
let input = edn::parse::value($input).expect("to be able to parse input EDN");
let expected = $expected.map(|e| {
edn::parse::value(e).expect("to be able to parse expected EDN").without_spans()
});
let mut par = keyword_map().map(|x| x.without_spans()).skip(eof());
let result = par.parse(input.into_atom_stream()).map(|x| x.0);
assert_eq!(result.ok(), expected);
}}
}
#[test]
fn test_keyword_map() {
assert_keyword_map_eq!(
"[:foo 1 2 3 :bar 4]",
Some("{:foo [1 2 3] :bar [4]}"));
// Trailing keywords aren't allowed.
assert_keyword_map_eq!(
"[:foo]",
None);
assert_keyword_map_eq!(
"[:foo 2 :bar]",
None);
// Duplicate keywords aren't allowed.
assert_keyword_map_eq!(
"[:foo 2 :foo 1]",
None);
// Starting with anything but a keyword isn't allowed.
assert_keyword_map_eq!(
"[2 :foo 1]",
None);
// Consecutive keywords aren't allowed.
assert_keyword_map_eq!(
"[:foo :bar 1]",
None);
// Empty lists return an empty map.
assert_keyword_map_eq!(
"[]",
Some("{}"));
}
}

View file

@ -1,261 +0,0 @@
// Copyright 2016 Mozilla
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use
// this file except in compliance with the License. You may obtain a copy of the
// License at http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
/// ! This module defines the interface and implementation for parsing an EDN
/// ! input into a structured Datalog query.
/// !
/// ! The query types are defined in the `query` crate, because they
/// ! are shared between the parser (EDN -> query), the translator
/// ! (query -> SQL), and the executor (query, SQL -> running code).
/// !
/// ! The query input can be in two forms: a 'flat' human-oriented
/// ! sequence:
/// !
/// ! ```clojure
/// ! [:find ?y :in $ ?x :where [?x :foaf/knows ?y]]
/// ! ```
/// !
/// ! or a more programmatically generable map:
/// !
/// ! ```clojure
/// ! {:find [?y]
/// ! :in [$]
/// ! :where [[?x :foaf/knows ?y]]}
/// ! ```
/// !
/// ! We parse by expanding the array format into four parts, treating them as the four
/// ! parts of the map.
extern crate edn;
extern crate mentat_parser_utils;
extern crate mentat_query;
use std::collections::BTreeMap;
use self::mentat_query::{
FindQuery,
FnArg,
FromValue,
Predicate,
PredicateFn,
SrcVar,
Variable,
};
use self::mentat_parser_utils::ValueParseError;
use super::parse::{
ErrorKind,
QueryParseResult,
Result,
clause_seq_to_patterns,
};
use super::util::vec_to_keyword_map;
/// If the provided slice of EDN values are all variables as
/// defined by `value_to_variable`, return a `Vec` of `Variable`s.
/// Otherwise, return the unrecognized Value in a `NotAVariableError`.
fn values_to_variables(vals: &[edn::Value]) -> Result<Vec<Variable>> {
let mut out: Vec<Variable> = Vec::with_capacity(vals.len());
for v in vals {
if let Some(var) = Variable::from_value(v) {
out.push(var);
continue;
}
bail!(ErrorKind::NotAVariableError(v.clone()));
}
return Ok(out);
}
#[allow(unused_variables)]
fn parse_find_parts(find: &[edn::Value],
ins: Option<&[edn::Value]>,
with: Option<&[edn::Value]>,
wheres: &[edn::Value])
-> QueryParseResult {
// :find must be an array of plain var symbols (?foo), pull expressions, and aggregates.
// For now we only support variables and the annotations necessary to declare which
// flavor of :find we want:
// ?x ?y ?z = FindRel
// [?x ...] = FindColl
// ?x . = FindScalar
// [?x ?y ?z] = FindTuple
//
// :in must be an array of sources ($), rules (%), and vars (?). For now we only support the
// default source. :in can be omitted, in which case the default is equivalent to `:in $`.
// TODO: process `ins`.
let source = SrcVar::DefaultSrc;
// :with is an array of variables. This is simple, so we don't use a parser.
let with_vars = if let Some(vals) = with {
values_to_variables(vals)?
} else {
vec![]
};
// :wheres is a whole datastructure.
let where_clauses = clause_seq_to_patterns(wheres)?;
super::parse::find_seq_to_find_spec(find)
.map(|spec| {
FindQuery {
find_spec: spec,
default_source: source,
with: with_vars,
in_vars: vec![], // TODO
in_sources: vec![], // TODO
where_clauses: where_clauses,
}
})
}
fn parse_find_map(map: BTreeMap<edn::Keyword, Vec<edn::Value>>) -> QueryParseResult {
// Eagerly awaiting `const fn`.
let kw_find = edn::Keyword::new("find");
let kw_in = edn::Keyword::new("in");
let kw_with = edn::Keyword::new("with");
let kw_where = edn::Keyword::new("where");
// Oh, if only we had `guard`.
if let Some(find) = map.get(&kw_find) {
if let Some(wheres) = map.get(&kw_where) {
parse_find_parts(find,
map.get(&kw_in).map(|x| x.as_slice()),
map.get(&kw_with).map(|x| x.as_slice()),
wheres)
} else {
bail!(ErrorKind::MissingFieldError(kw_where))
}
} else {
bail!(ErrorKind::MissingFieldError(kw_find))
}
}
fn parse_find_edn_map(map: BTreeMap<edn::Value, edn::Value>) -> QueryParseResult {
// Every key must be a Keyword. Every value must be a Vec.
let mut m = BTreeMap::new();
if map.is_empty() {
return parse_find_map(m);
}
for (k, v) in map {
if let edn::Value::Keyword(kw) = k {
if let edn::Value::Vector(vec) = v {
m.insert(kw, vec);
continue;
} else {
bail!(ErrorKind::InvalidInputError(v))
}
} else {
bail!(ErrorKind::InvalidInputError(k))
}
}
parse_find_map(m)
}
pub fn parse_find_string(string: &str) -> QueryParseResult {
let expr = edn::parse::value(string)?;
parse_find(expr.without_spans())
}
pub fn parse_find(expr: edn::Value) -> QueryParseResult {
// No `match` because scoping and use of `expr` in error handling is nuts.
if let edn::Value::Map(m) = expr {
return parse_find_edn_map(m);
}
if let edn::Value::Vector(ref v) = expr {
if let Some(m) = vec_to_keyword_map(v) {
return parse_find_map(m);
}
}
bail!(ErrorKind::InvalidInputError(expr))
}
#[cfg(test)]
mod test_parse {
extern crate edn;
use std::rc::Rc;
use self::edn::{NamespacedKeyword, PlainSymbol};
use self::edn::types::Value;
use super::mentat_query::{
Element,
FindSpec,
Pattern,
PatternNonValuePlace,
PatternValuePlace,
SrcVar,
Variable,
WhereClause,
};
use super::*;
// TODO: when #224 lands, fix to_keyword to be variadic.
#[test]
fn test_parse_find() {
let truncated_input = edn::Value::Vector(vec![Value::from_keyword(None, "find")]);
assert!(parse_find(truncated_input).is_err());
let input =
edn::Value::Vector(vec![Value::from_keyword(None, "find"),
Value::from_symbol(None, "?x"),
Value::from_symbol(None, "?y"),
Value::from_keyword(None, "where"),
edn::Value::Vector(vec![Value::from_symbol(None, "?x"),
Value::from_keyword("foo", "bar"),
Value::from_symbol(None, "?y")])]);
let parsed = parse_find(input).unwrap();
if let FindSpec::FindRel(elems) = parsed.find_spec {
assert_eq!(2, elems.len());
assert_eq!(vec![Element::Variable(Variable::from_valid_name("?x")),
Element::Variable(Variable::from_valid_name("?y"))],
elems);
} else {
panic!("Expected FindRel.");
}
assert_eq!(SrcVar::DefaultSrc, parsed.default_source);
assert_eq!(parsed.where_clauses,
vec![
WhereClause::Pattern(Pattern {
source: None,
entity: PatternNonValuePlace::Variable(Variable::from_valid_name("?x")),
attribute: PatternNonValuePlace::Ident(Rc::new(NamespacedKeyword::new("foo", "bar"))),
value: PatternValuePlace::Variable(Variable::from_valid_name("?y")),
tx: PatternNonValuePlace::Placeholder,
})]);
}
#[test]
fn test_parse_predicate() {
let input = "[:find ?x :where [?x :foo/bar ?y] [[< ?y 10]]]";
let parsed = parse_find_string(input).unwrap();
assert_eq!(parsed.where_clauses,
vec![
WhereClause::Pattern(Pattern {
source: None,
entity: PatternNonValuePlace::Variable(Variable::from_valid_name("?x")),
attribute: PatternNonValuePlace::Ident(Rc::new(NamespacedKeyword::new("foo", "bar"))),
value: PatternValuePlace::Variable(Variable::from_valid_name("?y")),
tx: PatternNonValuePlace::Placeholder,
}),
WhereClause::Pred(Predicate {
operator: PlainSymbol::new("<"),
args: vec![FnArg::Variable(Variable::from_valid_name("?y")),
FnArg::EntidOrInteger(10)],
}),
]);
}
}

View file

@ -21,19 +21,12 @@ extern crate edn;
#[macro_use]
extern crate mentat_parser_utils;
mod util;
mod parse;
pub mod find;
pub use find::{
parse_find,
parse_find_string,
};
pub use parse::{
Error,
ErrorKind,
QueryParseResult,
Result,
ResultExt,
parse_find_string,
};

View file

@ -13,14 +13,27 @@ extern crate edn;
extern crate mentat_parser_utils;
extern crate mentat_query;
use self::combine::{eof, many, many1, optional, parser, satisfy_map, Parser, ParseResult, Stream};
use self::combine::combinator::{choice, try};
use std; // To refer to std::result::Result.
use self::combine::{eof, many, many1, optional, parser, satisfy, satisfy_map, Parser, ParseResult, Stream};
use self::combine::combinator::{choice, or, try};
use self::mentat_parser_utils::{
ResultParser,
ValueParseError,
};
use self::mentat_parser_utils::value_and_span::Stream as ValueStream;
use self::mentat_parser_utils::value_and_span::{
Item,
OfExactlyParsing,
keyword_map,
list,
map,
seq,
vector,
};
use self::mentat_query::{
Element,
FindQuery,
@ -50,17 +63,17 @@ error_chain! {
}
errors {
NotAVariableError(value: edn::Value) {
NotAVariableError(value: edn::ValueAndSpan) {
description("not a variable")
display("not a variable: '{}'", value)
}
FindParseError(e: ValueParseError) {
FindParseError(e: combine::ParseError<ValueStream>) {
description(":find parse error")
display(":find parse error")
}
WhereParseError(e: ValueParseError) {
WhereParseError(e: combine::ParseError<ValueStream>) {
description(":where parse error")
display(":where parse error")
}
@ -83,321 +96,282 @@ error_chain! {
}
}
pub type WhereParseResult = Result<Vec<WhereClause>>;
pub type FindParseResult = Result<FindSpec>;
pub type QueryParseResult = Result<FindQuery>;
pub struct Query;
pub struct Query<I>(::std::marker::PhantomData<fn(I) -> I>);
def_parser!(Query, variable, Variable, {
satisfy_map(Variable::from_value)
});
impl<I> Query<I>
where I: Stream<Item = edn::Value>
{
fn to_parsed_value<T>(r: ParseResult<T, I>) -> Option<T> {
r.ok().map(|x| x.0)
}
}
def_parser!(Query, source_var, SrcVar, {
satisfy_map(SrcVar::from_value)
});
// TODO: interning.
def_value_satisfy_parser_fn!(Query, variable, Variable, Variable::from_value);
def_value_satisfy_parser_fn!(Query, source_var, SrcVar, SrcVar::from_value);
def_value_satisfy_parser_fn!(Query, predicate_fn, PredicateFn, PredicateFn::from_value);
def_value_satisfy_parser_fn!(Query, fn_arg, FnArg, FnArg::from_value);
pub struct Where<I>(::std::marker::PhantomData<fn(I) -> I>);
def_value_satisfy_parser_fn!(Where,
pattern_value_place,
PatternValuePlace,
PatternValuePlace::from_value);
def_value_satisfy_parser_fn!(Where,
pattern_non_value_place,
PatternNonValuePlace,
PatternNonValuePlace::from_value);
fn seq<T: Into<Option<edn::Value>>>(x: T) -> Option<Vec<edn::Value>> {
match x.into() {
Some(edn::Value::List(items)) => Some(items.into_iter().collect()),
Some(edn::Value::Vector(items)) => Some(items),
_ => None,
}
}
/// Take a vector Value containing one vector Value, and return the `Vec` inside the inner vector.
/// Also accepts an inner list, returning it as a `Vec`.
fn unwrap_nested(x: edn::Value) -> Option<Vec<edn::Value>> {
match x {
edn::Value::Vector(mut v) => {
seq(v.pop())
}
_ => None,
}
}
def_value_parser_fn!(Where, and, (), input, {
matches_plain_symbol!("and", input)
def_parser!(Query, predicate_fn, PredicateFn, {
satisfy_map(PredicateFn::from_value)
});
def_value_parser_fn!(Where, or, (), input, {
matches_plain_symbol!("or", input)
def_parser!(Query, fn_arg, FnArg, {
satisfy_map(FnArg::from_value)
});
def_value_parser_fn!(Where, or_join, (), input, {
matches_plain_symbol!("or-join", input)
def_parser!(Query, arguments, Vec<FnArg>, {
(many::<Vec<FnArg>, _>(Query::fn_arg()))
});
def_value_parser_fn!(Where, rule_vars, Vec<Variable>, input, {
satisfy_map(|x: edn::Value| {
seq(x).and_then(|items| {
let mut p = many1(Query::variable()).skip(eof());
Query::to_parsed_value(p.parse_lazy(&items[..]).into())
})}).parse_stream(input)
pub struct Where;
def_parser!(Where, pattern_value_place, PatternValuePlace, {
satisfy_map(PatternValuePlace::from_value)
});
def_value_parser_fn!(Where, or_pattern_clause, OrWhereClause, input, {
Where::clause().map(|clause| OrWhereClause::Clause(clause)).parse_stream(input)
def_parser!(Where, pattern_non_value_place, PatternNonValuePlace, {
satisfy_map(PatternNonValuePlace::from_value)
});
def_value_parser_fn!(Where, or_and_clause, OrWhereClause, input, {
satisfy_map(|x: edn::Value| {
seq(x).and_then(|items| {
let mut p = Where::and()
.with(many1(Where::clause()))
.skip(eof())
.map(OrWhereClause::And);
let r: ParseResult<OrWhereClause, _> = p.parse_lazy(&items[..]).into();
Query::to_parsed_value(r)
})
}).parse_stream(input)
def_matches_plain_symbol!(Where, and, "and");
def_matches_plain_symbol!(Where, or, "or");
def_matches_plain_symbol!(Where, or_join, "or-join");
def_parser!(Where, rule_vars, Vec<Variable>, {
seq()
.of_exactly(many1(Query::variable()))
});
def_value_parser_fn!(Where, or_where_clause, OrWhereClause, input, {
choice([Where::or_pattern_clause(), Where::or_and_clause()]).parse_stream(input)
def_parser!(Where, or_pattern_clause, OrWhereClause, {
Where::clause().map(|clause| OrWhereClause::Clause(clause))
});
def_value_parser_fn!(Where, or_clause, WhereClause, input, {
satisfy_map(|x: edn::Value| {
seq(x).and_then(|items| {
let mut p = Where::or()
.with(many1(Where::or_where_clause()))
.skip(eof())
.map(|clauses| {
WhereClause::OrJoin(
OrJoin {
unify_vars: UnifyVars::Implicit,
clauses: clauses,
})
});
let r: ParseResult<WhereClause, _> = p.parse_lazy(&items[..]).into();
Query::to_parsed_value(r)