Parse without copying; parse keyword maps using macros.

This is a big commit, but it breaks into two conceptual pieces.  The
first is to "parse without copying".  We replace a stream of an owned
collection of edn::ValueAndSpan and instead have a stream of a
borrowed collection of &edn::ValueAndSpan references.  (Generally,
this is represented as an iterator over a slice, but it can be over
other things too.)  Cloning such iterators is constant time, which
improves on cloning an owned collection of edn::ValueAndSpan, which is
linear time in the length of the collection and additional time
depending on the complexity of the EDN values.

The second conceptual piece is to parse keyword maps using a special
parser and a macro to build the parser implementations.  Before, we
created a new edn::ValueAndSpan::Map to represent a keyword map in
vector form; since we're working with &edn::ValueAndSpan references
now, we can't create an &edn::ValueAndSpan reference with an
appropriate lifetime.  Therefore we generalize the concept of
iteration slightly and turn keyword maps in map form into linear
iterators by flattening the value maps.  This is a potentially
obscuring transformation, so we have to take care to protect against
some failure cases.  (See the comments and the tests in the code.)

After these changes, parsing using `combine` is linear time (and
reasonably fast).
This commit is contained in:
Nick Alexander 2017-05-04 13:40:41 -07:00
parent 4fa57942d3
commit d1ac752de6
16 changed files with 669 additions and 435 deletions

View file

@ -61,3 +61,6 @@ path = "query-translator"
path = "tx-parser"
debug = true

View file

@ -251,6 +251,6 @@ pub fn bootstrap_entities() -> Vec<Entity> {
// Failure here is a coding error (since the inputs are fixed), not a runtime error.
// TODO: represent these bootstrap data errors rather than just panicing.
let bootstrap_entities: Vec<Entity> = mentat_tx_parser::Tx::parse(bootstrap_assertions.with_spans()).unwrap();
let bootstrap_entities: Vec<Entity> = mentat_tx_parser::Tx::parse(&bootstrap_assertions.with_spans()).unwrap();
return bootstrap_entities;

View file

@ -1154,7 +1154,7 @@ mod tests {
fn transact<I>(&mut self, transaction: I) -> Result<TxReport> where I: Borrow<str> {
// Failure to parse the transaction is a coding error, so we unwrap.
let assertions = edn::parse::value(transaction.borrow()).expect(format!("to be able to parse {} into EDN", transaction.borrow()).as_str());
let entities: Vec<_> = mentat_tx_parser::Tx::parse(assertions.clone()).expect(format!("to be able to parse {} into entities", assertions).as_str());
let entities: Vec<_> = mentat_tx_parser::Tx::parse(&assertions).expect(format!("to be able to parse {} into entities", assertions).as_str());
let details = {
// The block scopes the borrow of self.sqlite.

View file

@ -110,9 +110,21 @@ impl ValueAndSpan {
pub fn as_atom(&self) -> Option<&ValueAndSpan> {
if self.inner.is_atom() {
} else {
pub fn into_text(self) -> Option<String> {
pub fn as_text(&self) -> Option<&String> {
impl Value {

View file

@ -5,7 +5,8 @@ authors = ["Victor Porof <>", "Richard Newman <rnewman@mozilla
workspace = ".."
combine = "2.2.2"
combine = "2.3.2"
itertools = "0.5.9"
path = "../edn"

View file

@ -10,39 +10,7 @@
extern crate combine;
extern crate edn;
pub mod macros;
pub mod log;
pub mod value_and_span;
pub use log::{
/// `assert_parses_to!` simplifies some of the boilerplate around running a
/// parser function against input and expecting a certain result.
macro_rules! assert_parses_to {
( $parser: expr, $input: expr, $expected: expr ) => {{
let par = $parser();
let result = par.skip(eof()).parse($input.with_spans().into_atom_stream()).map(|x| x.0);
assert_eq!(result, Ok($expected));
/// `assert_edn_parses_to!` simplifies some of the boilerplate around running a parser function
/// against string input and expecting a certain result.
macro_rules! assert_edn_parses_to {
( $parser: expr, $input: expr, $expected: expr ) => {{
let par = $parser();
let input = edn::parse::value($input).expect("to be able to parse input as EDN");
let result = par.skip(eof()).parse(input.into_atom_stream()).map(|x| x.0);
assert_eq!(result, Ok($expected));
extern crate itertools;
/// A `ValueParseError` is a `combine::primitives::ParseError`-alike that implements the `Debug`,
/// `Display`, and `std::error::Error` traits. In addition, it doesn't capture references, making
@ -52,11 +20,29 @@ macro_rules! assert_edn_parses_to {
/// `Display`; rather than introducing a newtype like `DisplayVec`, we re-use `edn::Value::Vector`.
pub struct ValueParseError {
pub position: usize,
pub position: edn::Span,
// Think of this as `Vec<Error<edn::Value, DisplayVec<edn::Value>>>`; see above.
pub errors: Vec<combine::primitives::Error<edn::Value, edn::Value>>,
pub errors: Vec<combine::primitives::Error<edn::ValueAndSpan, edn::ValueAndSpan>>,
pub mod macros;
pub use macros::{
pub mod log;
pub mod value_and_span;
pub use value_and_span::{
pub use log::{
impl std::fmt::Debug for ValueParseError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
@ -68,7 +54,7 @@ impl std::fmt::Debug for ValueParseError {
impl std::fmt::Display for ValueParseError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
try!(writeln!(f, "Parse error at {}", self.position));
try!(writeln!(f, "Parse error at {:?}", self.position));
combine::primitives::Error::fmt_errors(&self.errors, f)
@ -79,49 +65,13 @@ impl std::error::Error for ValueParseError {
impl<'a> From<combine::primitives::ParseError<&'a [edn::Value]>> for ValueParseError {
fn from(e: combine::primitives::ParseError<&'a [edn::Value]>) -> ValueParseError {
impl<'a> From<combine::primitives::ParseError<Stream<'a>>> for ValueParseError {
fn from(e: combine::primitives::ParseError<Stream<'a>>) -> ValueParseError {
ValueParseError {
position: e.position,
errors: e.errors.into_iter().map(|e| e.map_range(|r| {
let mut v = Vec::new();
/// Allow to map the range types of combine::primitives::{Info, Error}.
trait MapRange<R, S> {
type Output;
fn map_range<F>(self, f: F) -> Self::Output where F: FnOnce(R) -> S;
impl<T, R, S> MapRange<R, S> for combine::primitives::Info<T, R> {
type Output = combine::primitives::Info<T, S>;
fn map_range<F>(self, f: F) -> combine::primitives::Info<T, S> where F: FnOnce(R) -> S {
use combine::primitives::Info::*;
match self {
Token(t) => Token(t),
Range(r) => Range(f(r)),
Owned(s) => Owned(s),
Borrowed(x) => Borrowed(x),
impl<T, R, S> MapRange<R, S> for combine::primitives::Error<T, R> {
type Output = combine::primitives::Error<T, S>;
fn map_range<F>(self, f: F) -> combine::primitives::Error<T, S> where F: FnOnce(R) -> S {
use combine::primitives::Error::*;
match self {
Unexpected(x) => Unexpected(x.map_range(f)),
Expected(x) => Expected(x.map_range(f)),
Message(x) => Message(x.map_range(f)),
Other(x) => Other(x),
position: e.position.0,
errors: e.errors.into_iter()
.map(|e| e.map_token(|t| t.clone()).map_range(|r| r.clone()))

View file

@ -36,6 +36,8 @@ use combine::combinator::{
/// Nothing about this is specific to the result type of the parser.
pub type ResultParser<O, I> = Expected<FnParser<I, fn(I) -> ParseResult<O, I>>>;
pub struct KeywordMapParser<T>(pub T);
/// `satisfy_unwrap!` makes it a little easier to implement a `satisfy_map`
/// body that matches a particular `Value` enum case, otherwise returning `None`.
@ -65,61 +67,71 @@ macro_rules! matches_plain_symbol {
/// Define an `impl` body for the `$parser` type. The body will contain a parser
/// function called `$name`, consuming a stream of `$item_type`s. The parser's
/// result type will be `$result_type`.
/// The provided `$body` will be evaluated with `$input` bound to the input stream.
/// `$body`, when run, should return a `ParseResult` of the appropriate result type.
macro_rules! def_parser_fn {
( $parser: ident, $name: ident, $item_type: ty, $result_type: ty, $input: ident, $body: block ) => {
impl<I> $parser<I> where I: Stream<Item = $item_type> {
fn $name() -> ResultParser<$result_type, I> {
fn inner<I: Stream<Item = $item_type>>($input: I) -> ParseResult<$result_type, I> {
parser(inner as fn(I) -> ParseResult<$result_type, I>).expected(stringify!($name))
macro_rules! def_parser {
( $parser: ident, $name: ident, $result_type: ty, $body: block ) => {
impl $parser {
fn $name() -> ResultParser<$result_type, $crate::value_and_span::Stream> {
fn inner(input: $crate::value_and_span::Stream) -> ParseResult<$result_type, $crate::value_and_span::Stream> {
impl<'p> $parser<'p> {
fn $name<'a>() -> ResultParser<$result_type, $crate::value_and_span::Stream<'a>> {
fn inner<'a>(input: $crate::value_and_span::Stream<'a>) -> ParseResult<$result_type, $crate::value_and_span::Stream<'a>> {
parser(inner as fn($crate::value_and_span::Stream) -> ParseResult<$result_type, $crate::value_and_span::Stream>).expected(stringify!($name))
parser(inner as fn($crate::value_and_span::Stream<'a>) -> ParseResult<$result_type, $crate::value_and_span::Stream<'a>>).expected(stringify!($name))
/// `def_value_parser_fn` is a short-cut to `def_parser_fn` with the input type
/// being `edn::Value`.
/// `assert_parses_to!` simplifies some of the boilerplate around running a
/// parser function against input and expecting a certain result.
macro_rules! def_value_parser_fn {
( $parser: ident, $name: ident, $result_type: ty, $input: ident, $body: block ) => {
def_parser_fn!($parser, $name, edn::Value, $result_type, $input, $body);
macro_rules! assert_parses_to {
( $parser: expr, $input: expr, $expected: expr ) => {{
let input = $input.with_spans();
let par = $parser();
let stream = input.atom_stream();
let result = par.skip(eof()).parse(stream).map(|x| x.0);
assert_eq!(result, Ok($expected));
/// `def_value_satisfy_parser_fn` is a short-cut to `def_parser_fn` with the input type
/// being `edn::Value` and the body being a call to `satisfy_map` with the given transformer.
/// In practice this allows you to simply pass a function that accepts an `&edn::Value` and
/// returns an `Option<$result_type>`: if a suitable value is at the front of the stream,
/// it will be converted and returned by the parser; otherwise, the parse will fail.
/// `assert_edn_parses_to!` simplifies some of the boilerplate around running a parser function
/// against string input and expecting a certain result.
macro_rules! def_value_satisfy_parser_fn {
( $parser: ident, $name: ident, $result_type: ty, $transformer: path ) => {
def_value_parser_fn!($parser, $name, $result_type, input, {
satisfy_map(|x: edn::Value| $transformer(&x)).parse_stream(input)
macro_rules! assert_edn_parses_to {
( $parser: expr, $input: expr, $expected: expr ) => {{
let input = edn::parse::value($input).expect("to be able to parse input as EDN");
let par = $parser();
let stream = input.atom_stream();
let result = par.skip(eof()).parse(stream).map(|x| x.0);
assert_eq!(result, Ok($expected));
/// `assert_parse_failure_contains!` simplifies running a parser function against string input and
/// expecting a certain failure. This is working around the complexity of pattern matching parse
/// errors that contain spans.
macro_rules! assert_parse_failure_contains {
( $parser: expr, $input: expr, $expected: expr ) => {{
let input = edn::parse::value($input).expect("to be able to parse input as EDN");
let par = $parser();
let stream = input.atom_stream();
let result = par.skip(eof()).parse(stream).map(|x| x.0).map_err(|e| -> ::ValueParseError { e.into() });
assert!(format!("{:?}", result).contains($expected), "Expected {:?} to contain {:?}", result, $expected);
macro_rules! keyword_map_of {
($(($keyword:expr, $value:expr)),+) => {{
let mut seen = std::collections::BTreeSet::default();
if !seen.insert($keyword) {
panic!("keyword map has repeated key: {}", stringify!($keyword));
KeywordMapParser(($(($keyword, $value)),+))

View file

@ -8,13 +8,15 @@
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
use std;
use std::cmp::Ordering;
use std::fmt::{
use std::cmp::Ordering;
use combine::{
@ -22,15 +24,11 @@ use combine::{
use combine::primitives; // To not shadow Error.
use combine::primitives::{
use combine::combinator::{
@ -40,9 +38,13 @@ use combine::combinator::{
use edn;
use macros::{
/// A wrapper to let us order `edn::Span` in whatever way is appropriate for parsing with `combine`.
#[derive(Clone, Copy, Debug)]
pub struct SpanPosition(edn::Span);
pub struct SpanPosition(pub edn::Span);
impl Display for SpanPosition {
fn fmt(&self, f: &mut Formatter) -> ::std::fmt::Result {
@ -76,29 +78,37 @@ impl Ord for SpanPosition {
/// yielding `ValueAndSpan` items, which allows us to yield uniform `combine::ParseError` types from
/// disparate parsers.
pub enum IntoIter {
/// Iterates via a single `flat_map` [k1, v1, k2, v2, ...].
pub enum Iter<'a> {
Atom(std::iter::Once<&'a edn::ValueAndSpan>),
Vector(std::slice::Iter<'a, edn::ValueAndSpan>),
List(std::collections::linked_list::Iter<'a, edn::ValueAndSpan>),
/// Iterates a map {:k1 v1, :k2 v2, ...} as a single `flat_map` slice [k1, v1, k2, v2, ...].
Map(std::iter::FlatMap<std::collections::btree_map::Iter<'a, edn::ValueAndSpan, edn::ValueAndSpan>,
std::iter::Chain<std::iter::Once<&'a edn::ValueAndSpan>, std::iter::Once<&'a edn::ValueAndSpan>>,
fn((&'a edn::ValueAndSpan, &'a edn::ValueAndSpan)) -> std::iter::Chain<std::iter::Once<&'a edn::ValueAndSpan>, std::iter::Once<&'a edn::ValueAndSpan>>>),
/// Iterates a map with vector values {:k1 [v11 v12 ...], :k2 [v21 v22 ...], ...} as a single
/// flattened map [k1, v11, v12, ..., k2, v21, v22, ...].
KeywordMap(std::iter::FlatMap<std::collections::btree_map::Iter<'a, edn::ValueAndSpan, edn::ValueAndSpan>,
std::iter::Chain<std::iter::Once<&'a edn::ValueAndSpan>, Box<Iter<'a>>>,
fn((&'a edn::ValueAndSpan, &'a edn::ValueAndSpan)) -> std::iter::Chain<std::iter::Once<&'a edn::ValueAndSpan>, Box<Iter<'a>>>>),
// TODO: Support Set and Map more naturally. This is significantly more work because the
// existing BTreeSet and BTreeMap iterators do not implement Clone, and implementing Clone for
// them is involved. Since we don't really need to parse sets and maps at this time, this will
// do for now.
impl Iterator for IntoIter {
type Item = edn::ValueAndSpan;
impl<'a> Iterator for Iter<'a> {
type Item = &'a edn::ValueAndSpan;
fn next(&mut self) -> Option<Self::Item> {
match *self {
IntoIter::Empty(ref mut i) =>,
IntoIter::Atom(ref mut i) =>,
IntoIter::Vector(ref mut i) =>,
IntoIter::List(ref mut i) =>,
IntoIter::Map(ref mut i) =>,
Iter::Empty => None,
Iter::Atom(ref mut i) =>,
Iter::Vector(ref mut i) =>,
Iter::List(ref mut i) =>,
Iter::Map(ref mut i) =>,
Iter::KeywordMap(ref mut i) =>,
@ -107,11 +117,11 @@ impl Iterator for IntoIter {
/// to `combine::IteratorStream` as produced by `combine::from_iter`, but specialized to
/// `edn::ValueAndSpan`.
pub struct Stream(IntoIter, SpanPosition);
pub struct Stream<'a>(Iter<'a>, SpanPosition);
/// Things specific to parsing with `combine` and our `Stream` that need a trait to live outside of
/// the `edn` crate.
pub trait Item: Clone + PartialEq + Sized {
pub trait Item<'a>: Clone + PartialEq + Sized {
/// Position could be specialized to `SpanPosition`.
type Position: Clone + Ord + std::fmt::Display;
@ -120,13 +130,16 @@ pub trait Item: Clone + PartialEq + Sized {
fn start(&self) -> Self::Position;
fn update_position(&self, &mut Self::Position);
fn into_child_stream_iter(self) -> IntoIter;
fn into_child_stream(self) -> Stream;
fn into_atom_stream_iter(self) -> IntoIter;
fn into_atom_stream(self) -> Stream;
fn child_iter(&'a self) -> Iter<'a>;
fn child_stream(&'a self) -> Stream<'a>;
fn atom_iter(&'a self) -> Iter<'a>;
fn atom_stream(&'a self) -> Stream<'a>;
fn keyword_map_iter(&'a self) -> Iter<'a>;
fn keyword_map_stream(&'a self) -> Stream<'a>;
impl Item for edn::ValueAndSpan {
impl<'a> Item<'a> for edn::ValueAndSpan {
type Position = SpanPosition;
fn start(&self) -> Self::Position {
@ -137,28 +150,48 @@ impl Item for edn::ValueAndSpan {
*position = SpanPosition(self.span.clone())
fn into_child_stream_iter(self) -> IntoIter {
match self.inner {
edn::SpannedValue::Vector(values) => IntoIter::Vector(values.into_iter()),
edn::SpannedValue::List(values) => IntoIter::List(values.into_iter()),
// Parsing pairs with `combine` is tricky; parsing sequences is easy.
edn::SpannedValue::Map(map) => IntoIter::Map(map.into_iter().flat_map(|(a, v)| std::iter::once(a).chain(std::iter::once(v))).collect::<Vec<_>>().into_iter()),
_ => IntoIter::Empty(std::iter::empty()),
fn keyword_map_iter(&'a self) -> Iter<'a> {
fn flatten_k_vector<'a>((k, v): (&'a edn::ValueAndSpan, &'a edn::ValueAndSpan)) -> std::iter::Chain<std::iter::Once<&'a edn::ValueAndSpan>, Box<Iter<'a>>> {
match self.inner.as_map() {
Some(ref map) => Iter::KeywordMap(map.iter().flat_map(flatten_k_vector)),
None => Iter::Empty
fn into_child_stream(self) -> Stream {
fn keyword_map_stream(&'a self) -> Stream<'a> {
let span = self.span.clone();
Stream(self.into_child_stream_iter(), SpanPosition(span))
Stream(self.keyword_map_iter(), SpanPosition(span))
fn into_atom_stream_iter(self) -> IntoIter {
fn child_iter(&'a self) -> Iter<'a> {
fn flatten_k_v<'a>((k, v): (&'a edn::ValueAndSpan, &'a edn::ValueAndSpan)) -> std::iter::Chain<std::iter::Once<&'a edn::ValueAndSpan>, std::iter::Once<&'a edn::ValueAndSpan>> {
match self.inner {
edn::SpannedValue::Vector(ref values) => Iter::Vector(values.iter()),
edn::SpannedValue::List(ref values) => Iter::List(values.iter()),
// Parsing pairs with `combine` is tricky; parsing sequences is easy.
edn::SpannedValue::Map(ref map) => Iter::Map(map.iter().flat_map(flatten_k_v)),
_ => Iter::Empty,
fn into_atom_stream(self) -> Stream {
fn child_stream(&'a self) -> Stream<'a> {
let span = self.span.clone();
Stream(self.into_atom_stream_iter(), SpanPosition(span))
Stream(self.child_iter(), SpanPosition(span))
fn atom_iter(&'a self) -> Iter<'a> {
fn atom_stream(&'a self) -> Stream<'a> {
let span = self.span.clone();
Stream(self.atom_iter(), SpanPosition(span))
@ -174,9 +207,26 @@ impl Item for edn::ValueAndSpan {
pub struct OfExactly<P, N>(P, N);
impl<P, N, O> Parser for OfExactly<P, N>
where P: Parser<Input=Stream, Output=edn::ValueAndSpan>,
N: Parser<Input=Stream, Output=O>,
pub trait Streaming<'a> {
fn as_stream(self) -> Stream<'a>;
impl<'a> Streaming<'a> for &'a edn::ValueAndSpan {
fn as_stream(self) -> Stream<'a> {
impl<'a> Streaming<'a> for Stream<'a> {
fn as_stream(self) -> Stream<'a> {
impl<'a, P, N, M, O> Parser for OfExactly<P, N>
where P: Parser<Input=Stream<'a>, Output=M>,
N: Parser<Input=Stream<'a>, Output=O>,
M: 'a + Streaming<'a>,
type Input = P::Input;
type Output = O;
@ -186,7 +236,7 @@ impl<P, N, O> Parser for OfExactly<P, N>
match self.0.parse_lazy(input) {
ConsumedOk((outer_value, outer_input)) => {
match self.1.parse_lazy(outer_value.into_child_stream()) {
match self.1.parse_lazy(outer_value.as_stream()) {
ConsumedOk((inner_value, mut inner_input)) | EmptyOk((inner_value, mut inner_input)) => {
match inner_input.uncons() {
Err(ref err) if *err == primitives::Error::end_of_input() => ConsumedOk((inner_value, outer_input)),
@ -200,7 +250,7 @@ impl<P, N, O> Parser for OfExactly<P, N>
EmptyOk((outer_value, outer_input)) => {
match self.1.parse_lazy(outer_value.into_child_stream()) {
match self.1.parse_lazy(outer_value.as_stream()) {
ConsumedOk((inner_value, mut inner_input)) | EmptyOk((inner_value, mut inner_input)) => {
match inner_input.uncons() {
Err(ref err) if *err == primitives::Error::end_of_input() => EmptyOk((inner_value, outer_input)),
@ -222,9 +272,10 @@ impl<P, N, O> Parser for OfExactly<P, N>
pub fn of_exactly<P, N, O>(p: P, n: N) -> OfExactly<P, N>
where P: Parser<Input=Stream, Output=edn::ValueAndSpan>,
N: Parser<Input=Stream, Output=O>,
pub fn of_exactly<'a, P, N, M, O>(p: P, n: N) -> OfExactly<P, N>
where P: Parser<Input=Stream<'a>, Output=M>,
N: Parser<Input=Stream<'a>, Output=O>,
M: 'a + Streaming<'a>,
OfExactly(p, n)
@ -236,8 +287,9 @@ pub trait OfExactlyParsing: Parser + Sized {
N: Parser<Input = Self::Input, Output=O>;
impl<P> OfExactlyParsing for P
where P: Parser<Input=Stream, Output=edn::ValueAndSpan>
impl<'a, P, M> OfExactlyParsing for P
where P: Parser<Input=Stream<'a>, Output=M>,
M: 'a + Streaming<'a>,
fn of_exactly<N, O>(self, n: N) -> OfExactly<P, N>
where N: Parser<Input = Self::Input, Output=O>
@ -247,10 +299,10 @@ impl<P> OfExactlyParsing for P
/// Equivalent to `combine::IteratorStream`.
impl StreamOnce for Stream
impl<'a> StreamOnce for Stream<'a>
type Item = edn::ValueAndSpan;
type Range = edn::ValueAndSpan;
type Item = &'a edn::ValueAndSpan;
type Range = &'a edn::ValueAndSpan;
type Position = SpanPosition;
@ -272,84 +324,132 @@ impl StreamOnce for Stream
/// Shorthands, just enough to convert the `mentat_db` crate for now. Written using `Box` for now:
/// it's simple and we can address allocation issues if and when they surface.
pub fn vector() -> Box<Parser<Input=Stream, Output=edn::ValueAndSpan>> {
satisfy(|v: edn::ValueAndSpan| v.inner.is_vector()).boxed()
pub fn list() -> Box<Parser<Input=Stream, Output=edn::ValueAndSpan>> {
satisfy(|v: edn::ValueAndSpan| v.inner.is_list()).boxed()
pub fn map() -> Box<Parser<Input=Stream, Output=edn::ValueAndSpan>> {
satisfy(|v: edn::ValueAndSpan| v.inner.is_map()).boxed()
pub fn seq() -> Box<Parser<Input=Stream, Output=edn::ValueAndSpan>> {
satisfy(|v: edn::ValueAndSpan| v.inner.is_list() || v.inner.is_vector()).boxed()
pub fn integer() -> Box<Parser<Input=Stream, Output=i64>> {
satisfy_map(|v: edn::ValueAndSpan| v.inner.as_integer()).boxed()
pub fn namespaced_keyword() -> Box<Parser<Input=Stream, Output=edn::NamespacedKeyword>> {
satisfy_map(|v: edn::ValueAndSpan| v.inner.as_namespaced_keyword().cloned()).boxed()
/// Like `combine::token()`, but compare an `edn::Value` to an `edn::ValueAndSpan`.
pub fn value(value: edn::Value) -> Box<Parser<Input=Stream, Output=edn::ValueAndSpan>> {
// TODO: make this comparison faster. Right now, we drop all the spans; if we walked the value
// trees together, we could avoid creating garbage.
satisfy(move |v: edn::ValueAndSpan| value == v.inner.into()).boxed()
fn keyword_map_(input: Stream) -> ParseResult<edn::ValueAndSpan, Stream>
// One run is a keyword followed by one or more non-keywords.
let run = (satisfy(|v: edn::ValueAndSpan| v.inner.is_keyword()),
many1(satisfy(|v: edn::ValueAndSpan| !v.inner.is_keyword()))
.map(|vs: Vec<edn::ValueAndSpan>| {
// TODO: extract "spanning".
let beg = vs.first().unwrap().span.0;
let end = vs.last().unwrap().span.1;
edn::ValueAndSpan {
inner: edn::SpannedValue::Vector(vs),
span: edn::Span(beg, end),
let mut runs = vector().of_exactly(many::<Vec<_>, _>(run));
let (data, input) = try!(runs.parse_lazy(input).into());
let mut m: std::collections::BTreeMap<edn::ValueAndSpan, edn::ValueAndSpan> = std::collections::BTreeMap::default();
for (k, vs) in data {
if m.insert(k, vs).is_some() {
// TODO: improve this message.
return Err(Consumed::Empty(ParseError::from_errors(input.into_inner().position(), Vec::new())))
pub fn vector_<'a>(input: Stream<'a>) -> ParseResult<Stream<'a>, Stream<'a>> {
satisfy_map(|v: &'a edn::ValueAndSpan| {
if v.inner.is_vector() {
} else {
let map = edn::ValueAndSpan {
inner: edn::SpannedValue::Map(m),
span: edn::Span(0, 0), // TODO: fix this.
Ok((map, input))
/// Turn a vector of keywords and non-keyword values into a map. As an example, turn
/// ```edn
/// [:keyword1 value1 value2 ... :keyword2 value3 value4 ...]
/// ```
/// into
/// ```edn
/// {:keyword1 [value1 value2 ...] :keyword2 [value3 value4 ...]}
/// ```.
pub fn keyword_map() -> Expected<FnParser<Stream, fn(Stream) -> ParseResult<edn::ValueAndSpan, Stream>>>
// The `as` work arounds
parser(keyword_map_ as fn(Stream) -> ParseResult<edn::ValueAndSpan, Stream>).expected("keyword map")
pub fn vector<'a>() -> Expected<FnParser<Stream<'a>, fn(Stream<'a>) -> ParseResult<Stream<'a>, Stream<'a>>>> {
parser(vector_ as fn(Stream<'a>) -> ParseResult<Stream<'a>, Stream<'a>>).expected("vector")
pub fn list_<'a>(input: Stream<'a>) -> ParseResult<Stream<'a>, Stream<'a>> {
satisfy_map(|v: &'a edn::ValueAndSpan| {
if v.inner.is_list() {
} else {
pub fn list<'a>() -> Expected<FnParser<Stream<'a>, fn(Stream<'a>) -> ParseResult<Stream<'a>, Stream<'a>>>> {
parser(list_ as fn(Stream<'a>) -> ParseResult<Stream<'a>, Stream<'a>>).expected("list")
pub fn seq_<'a>(input: Stream<'a>) -> ParseResult<Stream<'a>, Stream<'a>> {
satisfy_map(|v: &'a edn::ValueAndSpan| {
if v.inner.is_list() || v.inner.is_vector() {
} else {
pub fn seq<'a>() -> Expected<FnParser<Stream<'a>, fn(Stream<'a>) -> ParseResult<Stream<'a>, Stream<'a>>>> {
parser(seq_ as fn(Stream<'a>) -> ParseResult<Stream<'a>, Stream<'a>>).expected("vector|list")
pub fn map_<'a>(input: Stream<'a>) -> ParseResult<Stream<'a>, Stream<'a>> {
satisfy_map(|v: &'a edn::ValueAndSpan| {
if v.inner.is_map() {
} else {
pub fn map<'a>() -> Expected<FnParser<Stream<'a>, fn(Stream<'a>) -> ParseResult<Stream<'a>, Stream<'a>>>> {
parser(map_ as fn(Stream<'a>) -> ParseResult<Stream<'a>, Stream<'a>>).expected("map")
/// A `[k v]` pair in the map form of a keyword map must have the shape `[:k, [v1, v2, ...]]`, with
/// none of `v1`, `v2`, ... a keyword: without loss of generality, we cannot represent the case
/// where `vn` is a keyword `:l`, since `[:k v1 v2 ... :l]`, isn't a valid keyword map in vector
/// form. This function tests that a `[k v]` pair obeys these constraints.
/// If we didn't test this, then we might flatten a map `[:k [:l]] to `[:k :l]`, which isn't a valid
/// keyword map in vector form.
pub fn is_valid_keyword_map_k_v<'a>((k, v): (&'a edn::ValueAndSpan, &'a edn::ValueAndSpan)) -> bool {
if !k.inner.is_keyword() {
return false;
match v.inner.as_vector() {
None => {
return false;
Some(ref vs) => {
if !vs.iter().all(|vv| !vv.inner.is_keyword()) {
return false;
return true;
pub fn keyword_map_<'a>(input: Stream<'a>) -> ParseResult<Stream<'a>, Stream<'a>> {
satisfy_map(|v: &'a edn::ValueAndSpan| {
v.inner.as_map().and_then(|map| {
if map.iter().all(is_valid_keyword_map_k_v) {
println!("yes {:?}", map);
} else {
println!("no {:?}", map);
pub fn keyword_map<'a>() -> Expected<FnParser<Stream<'a>, fn(Stream<'a>) -> ParseResult<Stream<'a>, Stream<'a>>>> {
parser(keyword_map_ as fn(Stream<'a>) -> ParseResult<Stream<'a>, Stream<'a>>).expected("keyword map")
pub fn integer_<'a>(input: Stream<'a>) -> ParseResult<i64, Stream<'a>> {
satisfy_map(|v: &'a edn::ValueAndSpan| v.inner.as_integer())
pub fn integer<'a>() -> Expected<FnParser<Stream<'a>, fn(Stream<'a>) -> ParseResult<i64, Stream<'a>>>> {
parser(integer_ as fn(Stream<'a>) -> ParseResult<i64, Stream<'a>>).expected("integer")
pub fn namespaced_keyword_<'a>(input: Stream<'a>) -> ParseResult<&'a edn::NamespacedKeyword, Stream<'a>> {
satisfy_map(|v: &'a edn::ValueAndSpan| v.inner.as_namespaced_keyword())
pub fn namespaced_keyword<'a>() -> Expected<FnParser<Stream<'a>, fn(Stream<'a>) -> ParseResult<&'a edn::NamespacedKeyword, Stream<'a>>>> {
parser(namespaced_keyword_ as fn(Stream<'a>) -> ParseResult<&'a edn::NamespacedKeyword, Stream<'a>>).expected("namespaced_keyword")
/// Generate a `satisfy` expression that matches a `PlainSymbol` value with the given name.
@ -359,8 +459,8 @@ pub fn keyword_map() -> Expected<FnParser<Stream, fn(Stream) -> ParseResult<edn:
macro_rules! def_matches_plain_symbol {
( $parser: ident, $name: ident, $input: expr ) => {
def_parser!($parser, $name, edn::ValueAndSpan, {
satisfy(|v: edn::ValueAndSpan| {
def_parser!($parser, $name, &'a edn::ValueAndSpan, {
satisfy(|v: &'a edn::ValueAndSpan| {
match v.inner {
edn::SpannedValue::PlainSymbol(ref s) => s.0.as_str() == $input,
_ => false,
@ -376,8 +476,8 @@ macro_rules! def_matches_plain_symbol {
macro_rules! def_matches_keyword {
( $parser: ident, $name: ident, $input: expr ) => {
def_parser!($parser, $name, edn::ValueAndSpan, {
satisfy(|v: edn::ValueAndSpan| {
def_parser!($parser, $name, &'a edn::ValueAndSpan, {
satisfy(|v: &'a edn::ValueAndSpan| {
match v.inner {
edn::SpannedValue::Keyword(ref s) => s.0.as_str() == $input,
_ => false,
@ -394,8 +494,8 @@ macro_rules! def_matches_keyword {
macro_rules! def_matches_namespaced_keyword {
( $parser: ident, $name: ident, $input_namespace: expr, $input_name: expr ) => {
def_parser!($parser, $name, edn::ValueAndSpan, {
satisfy(|v: edn::ValueAndSpan| {
def_parser!($parser, $name, &'a edn::ValueAndSpan, {
satisfy(|v: &'a edn::ValueAndSpan| {
match v.inner {
edn::SpannedValue::NamespacedKeyword(ref s) => s.namespace.as_str() == $input_namespace && == $input_name,
_ => false,
@ -405,57 +505,248 @@ macro_rules! def_matches_namespaced_keyword {
use combine::primitives::{
use combine::primitives::FastResult::*;
/// Compare to `tuple_parser!` in `combine`.
/// This uses edge cases in Rust's hygienic macro system to represent arbitrary values. That is,
/// `$value: ident` represents both a type in the tuple parameterizing `KeywordMapParser` (since
/// `(A, B, C)` is a valid type declaration) and also a variable value extracted from the underlying
/// instance value. `$tmp: ident` represents an optional value to return.
/// This unrolls the cases. Each loop iteration reads a token. It then unrolls the known cases,
/// checking if any case matches the keyword string. If yes, we parse further. If no, we move on
/// to the next case. If no case matches, we fail.
macro_rules! keyword_map_parser {
($(($keyword:ident, $value:ident, $tmp:ident)),+) => {
impl <'a, $($value:),+> Parser for KeywordMapParser<($((&'static str, $value)),+)>
where $($value: Parser<Input=Stream<'a>>),+
type Input = Stream<'a>;
type Output = ($(Option<$value::Output>),+);
fn parse_lazy(&mut self,
mut input: Stream<'a>)
-> ConsumedResult<($(Option<$value::Output>),+), Stream<'a>> {
let ($((ref $keyword, ref mut $value)),+) = (*self).0;
let mut consumed = false;
let mut $tmp = None;
loop {
match input.uncons() {
Ok(value) => {
if let Some(ref keyword) = value.inner.as_keyword() {
if keyword.0.as_str() == *$keyword {
if $tmp.is_some() {
// Repeated match -- bail out! Providing good error
// messages is hard; this will do for now.
return ConsumedErr(ParseError::new(input.position(), Error::Unexpected(Info::Token(value))));
consumed = true;
$tmp = match $value.parse_lazy(input.clone()) {
ConsumedOk((x, new_input)) => {
input = new_input;
EmptyErr(mut err) => {
if let Ok(t) = input.uncons() {
if consumed {
return ConsumedErr(err)
} else {
return EmptyErr(err)
ConsumedErr(err) => return ConsumedErr(err),
EmptyOk((x, new_input)) => {
input = new_input;
// No keyword matched! Bail out.
return ConsumedErr(ParseError::new(input.position(), Error::Unexpected(Info::Token(value))));
Err(err) => {
if consumed {
return ConsumedOk((($($tmp),+), input))
} else {
if err == Error::end_of_input() {
return EmptyOk((($($tmp),+), input));
return EmptyErr(ParseError::new(input.position(), err))
keyword_map_parser!((Ak, Av, At), (Bk, Bv, Bt));
keyword_map_parser!((Ak, Av, At), (Bk, Bv, Bt), (Ck, Cv, Ct));
keyword_map_parser!((Ak, Av, At), (Bk, Bv, Bt), (Ck, Cv, Ct), (Dk, Dv, Dt));
keyword_map_parser!((Ak, Av, At), (Bk, Bv, Bt), (Ck, Cv, Ct), (Dk, Dv, Dt), (Ek, Ev, Et));
keyword_map_parser!((Ak, Av, At), (Bk, Bv, Bt), (Ck, Cv, Ct), (Dk, Dv, Dt), (Ek, Ev, Et), (Fk, Fv, Ft));
keyword_map_parser!((Ak, Av, At), (Bk, Bv, Bt), (Ck, Cv, Ct), (Dk, Dv, Dt), (Ek, Ev, Et), (Fk, Fv, Ft), (Gk, Gv, Gt));
mod tests {
use combine::{eof};
use combine::{
use super::*;
/// Take a string `input` and a string `expected` and ensure that `input` parses to an
/// `edn::Value` keyword map equivalent to the `edn::Value` that `expected` parses to.
macro_rules! assert_keyword_map_eq {
( $input: expr, $expected: expr ) => {{
let input = edn::parse::value($input).expect("to be able to parse input EDN");
let expected = $|e| {
edn::parse::value(e).expect("to be able to parse expected EDN").without_spans()
let mut par = keyword_map().map(|x| x.without_spans()).skip(eof());
let result = par.parse(input.into_atom_stream()).map(|x| x.0);
assert_eq!(result.ok(), expected);
use macros::{
/// A little test parser.
pub struct Test<'a>(std::marker::PhantomData<&'a ()>);
def_matches_namespaced_keyword!(Test, add, "db", "add");
def_parser!(Test, entid, i64, {
.map(|x| x)
.or(namespaced_keyword().map(|_| -1))
#[should_panic(expected = r#"keyword map has repeated key: "x""#)]
fn test_keyword_map_of() {
keyword_map_of!(("x", Test::entid()),
("x", Test::entid()));
fn test_iter() {
// A vector and a map iterated as a keyword map produce the same elements.
let input = edn::parse::value("[:y 3 4 :x 1 2]").expect("to be able to parse input as EDN");
assert_eq!(input.child_iter().cloned().map(|x| x.without_spans()).into_iter().collect::<Vec<_>>(),
edn::parse::value("[:y 3 4 :x 1 2]").expect("to be able to parse input as EDN").without_spans().into_vector().expect("an EDN vector"));
let input = edn::parse::value("{:x [1 2] :y [3 4]}").expect("to be able to parse input as EDN");
assert_eq!(input.keyword_map_iter().cloned().map(|x| x.without_spans()).into_iter().collect::<Vec<_>>(),
edn::parse::value("[:y 3 4 :x 1 2]").expect("to be able to parse input as EDN").without_spans().into_vector().expect("an EDN vector"));
// Parsing a keyword map in map and vector form produces the same elements. The order (:y
// before :x) is a foible of our EDN implementation and could be easily changed.
assert_edn_parses_to!(|| keyword_map().or(vector()).map(|x||x| x.clone().without_spans()).into_iter().collect::<Vec<_>>()),
"{:x [1] :y [2]}",
edn::parse::value("[:y 2 :x 1]").expect("to be able to parse input as EDN").without_spans().into_vector().expect("an EDN vector"));
assert_edn_parses_to!(|| keyword_map().or(vector()).map(|x||x| x.clone().without_spans()).into_iter().collect::<Vec<_>>()),
"[:y 2 :x 1]",
edn::parse::value("[:y 2 :x 1]").expect("to be able to parse input as EDN").without_spans().into_vector().expect("an EDN vector"));
fn test_keyword_map() {
"[:foo 1 2 3 :bar 4]",
Some("{:foo [1 2 3] :bar [4]}"));
assert_edn_parses_to!(|| vector().of_exactly(keyword_map_of!(("x", Test::entid()), ("y", Test::entid()))),
"[:y 2 :x 1]",
(Some(1), Some(2)));
// Trailing keywords aren't allowed.
"[:foo 2 :bar]",
assert_edn_parses_to!(|| vector().of_exactly(keyword_map_of!(("x", Test::entid()), ("y", Test::entid()))),
"[:x 1 :y 2]",
(Some(1), Some(2)));
// Duplicate keywords aren't allowed.
"[:foo 2 :foo 1]",
assert_edn_parses_to!(|| vector().of_exactly(keyword_map_of!(("x", Test::entid()), ("y", Test::entid()))),
"[:x 1]",
(Some(1), None));
// Starting with anything but a keyword isn't allowed.
"[2 :foo 1]",
assert_edn_parses_to!(|| vector().of_exactly(keyword_map_of!(("x", vector().of_exactly(many::<Vec<_>, _>(Test::entid()))),
("y", vector().of_exactly(many::<Vec<_>, _>(Test::entid()))))),
"[:x [] :y [1 2]]",
(Some(vec![]), Some(vec![1, 2])));
// Consecutive keywords aren't allowed.
"[:foo :bar 1]",
// Empty lists return an empty map.
assert_edn_parses_to!(|| vector().of_exactly(keyword_map_of!(("x", vector().of_exactly(many::<Vec<_>, _>(Test::entid()))),
("y", vector().of_exactly(many::<Vec<_>, _>(Test::entid()))))),
(None, None));
fn test_keyword_map_failures() {
assert_parse_failure_contains!(|| vector().of_exactly(keyword_map_of!(("x", Test::entid()), ("y", Test::entid()))),
"[:x 1 :x 2]",
r#"errors: [Unexpected(Token(ValueAndSpan { inner: Keyword(Keyword("x"))"#);
// assert_edn_parses_to!(|| keyword_map().or(vector()).map(|x||x| x.clone().without_spans()).into_iter().collect::<Vec<_>>()), "{:x [1] :y [2]}", vec![]);
// assert_edn_parses_to!(|| keyword_map().or(vector()).of_exactly((Test::entid(), Test::entid())), "{:x [1] :y [2]}", (-1, 1));
// assert_edn_parses_to!(|| kw_map().of_exactly((Test::entid(), Test::entid())), "[:a 0 :b 0 1]", (1, 1));
// assert_edn_parses_to!(|| keyword_map_of(&[(":kw1", Test::entid()),
// (":kw2", (Test::entid(), Test::entid())),]),
// "{:kw1 0 :kw2 1 :x/y}", ((Some(0), Some((0, 1)))));
// let input = edn::parse::value("[:x/y]").expect("to be able to parse input as EDN");
// let par = vector().of_exactly(Test::entid());
// let stream: Stream = (&input).atom_stream();
// let result = par.skip(eof()).parse(stream).map(|x| x.0);
// assert_eq!(result, Ok(1));
// }
// #[test]
// fn test_keyword_map() {
// assert_keyword_map_eq!(
// "[:foo 1 2 3 :bar 4]",
// Some("{:foo [1 2 3] :bar [4]}"));
// // Trailing keywords aren't allowed.
// assert_keyword_map_eq!(
// "[:foo]",
// None);
// assert_keyword_map_eq!(
// "[:foo 2 :bar]",
// None);
// // Duplicate keywords aren't allowed.
// assert_keyword_map_eq!(
// "[:foo 2 :foo 1]",
// None);
// // Starting with anything but a keyword isn't allowed.
// assert_keyword_map_eq!(
// "[2 :foo 1]",
// None);
// // Consecutive keywords aren't allowed.
// assert_keyword_map_eq!(
// "[:foo :bar 1]",
// None);
// // Empty lists return an empty map.
// assert_keyword_map_eq!(
// "[]",
// Some("{}"));
// }

View file

@ -4,7 +4,7 @@ version = "0.0.1"
workspace = ".."
combine = "2.2.2"
combine = "2.3.2"
error-chain = "0.8.1"
matches = "0.1"

View file

@ -21,6 +21,7 @@ use self::combine::{eof, many, many1, optional, parser, satisfy, satisfy_map, Pa
use self::combine::combinator::{any, choice, or, try};
use self::mentat_parser_utils::{
@ -79,12 +80,12 @@ error_chain! {
display("not a variable: '{}'", value)
FindParseError(e: combine::ParseError<ValueStream>) {
FindParseError(e: ValueParseError) {
description(":find parse error")
display(":find parse error")
WhereParseError(e: combine::ParseError<ValueStream>) {
WhereParseError(e: ValueParseError) {
description(":where parse error")
display(":where parse error")
@ -117,7 +118,7 @@ error_chain! {
pub struct Query;
pub struct Query<'a>(std::marker::PhantomData<&'a ()>);
def_parser!(Query, variable, Variable, {
@ -141,7 +142,7 @@ def_parser!(Query, arguments, Vec<FnArg>, {
def_parser!(Query, direction, Direction, {
satisfy_map(|v: edn::ValueAndSpan| {
satisfy_map(|v: &edn::ValueAndSpan| {
match v.inner {
edn::SpannedValue::PlainSymbol(ref s) => {
let name = s.0.as_str();
@ -162,20 +163,20 @@ def_parser!(Query, order, Order, {
.or(Query::variable().map(|v| Order(Direction::Ascending, v)))
pub struct Where;
pub struct Where<'a>(std::marker::PhantomData<&'a ()>);
def_parser!(Where, pattern_value_place, PatternValuePlace, {
def_parser!(Query, natural_number, u64, {
any().and_then(|v: edn::ValueAndSpan| {
any().and_then(|v: &edn::ValueAndSpan| {
match v.inner {
edn::SpannedValue::Integer(x) if (x > 0) => {
Ok(x as u64)
spanned => {
let e = Box::new(Error::from_kind(ErrorKind::InvalidLimit(spanned.into())));
ref spanned => {
let e = Box::new(Error::from_kind(ErrorKind::InvalidLimit(spanned.clone().into())));
@ -338,7 +339,7 @@ def_parser!(Where, clauses, Vec<WhereClause>, {