mentat/tx-parser/src/lib.rs
Nick Alexander 5369f03464 Improve parsing of nested edn::ValueAndSpan streams. r=rnewman (#393)
* Pre: Expose more in edn.

* Pre: Make it easier to work with ValueAndSpan.

with_spans() is a temporary hack, needed only because I don't care to
parse the bootstrap assertions from text right now.

* Part 1a: Add `value_and_span` for parsing nested `edn::ValueAndSpan` instances.

I wasn't able to abstract over `edn::Value` and `edn::ValueAndSpan`;
there are multiple obstacles.  I chose to roll with
`edn::ValueAndSpan` since it exposes the additional span information
that we will want to form good error messages in the future.

* Part 1b: Add keyword_map() parsing an `edn::Value::Vector` into an `edn::Value::map`.

* Part 1c: Add `Log`/`.log(...)` for logging parser progress.

This is a terrible hack, but it sure helps to debug complicated nested
parsers.  I don't even know what a principled approach would look
like; since our parser combinators are so frequently expressed in
code, it's hard to imagine a data-driven interpreter that can help
debug things.

* Part 2: Use `value_and_span` apparatus in tx-parser/.

I break an abstraction boundary by returning a value column
`edn::ValueAndSpan` rather than just an `edn::Value`.  That is, the
transaction processor shouldn't care where the `edn::Value` it is
processing arose -- even we care to track that information we should
bake it into the `Entity` type.  We do this because we need to
dynamically parse the value column to support nested maps, and parsing
requires a full `edn::ValueAndSpan`.  Alternately, we could cheat and
fake the spans when parsing nested maps, but that's potentially
expensive.

* Part 3: Use `value_and_span` apparatus in query-parser/.

* Part 4: Use `value_and_span` apparatus in root crate.

* Review comment: Make Span and SpanPosition Copy.

* Review comment: nits.

* Review comment: Make `or` be `or_exactly`.

I baked the eof checking directly into the parser, rather than using
the skip and eof parsers.  I also took the time to restore some tests
that were mistakenly commented out.

* Review comment: Extract and use def_matches_* macros.

* Review comment: .map() as late as possible.
2017-04-06 10:06:28 -07:00

305 lines
12 KiB
Rust

// Copyright 2016 Mozilla
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use
// this file except in compliance with the License. You may obtain a copy of the
// License at http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#![allow(dead_code)]
extern crate combine;
#[macro_use]
extern crate error_chain;
extern crate edn;
extern crate mentat_tx;
#[macro_use]
extern crate mentat_parser_utils;
use combine::{
eof,
many,
parser,
satisfy,
satisfy_map,
Parser,
ParseResult,
};
use mentat_tx::entities::{
AtomOrLookupRefOrVectorOrMapNotation,
Entid,
EntidOrLookupRefOrTempId,
Entity,
LookupRef,
MapNotation,
OpType,
TempId,
};
use mentat_parser_utils::{ResultParser};
use mentat_parser_utils::value_and_span::{
Item,
OfExactlyParsing,
integer,
list,
map,
namespaced_keyword,
vector,
};
pub mod errors;
pub use errors::*;
pub struct Tx;
def_parser!(Tx, entid, Entid, {
integer()
.map(|x| Entid::Entid(x))
.or(namespaced_keyword().map(|x| Entid::Ident(x)))
});
def_matches_plain_symbol!(Tx, literal_lookup_ref, "lookup-ref");
def_parser!(Tx, lookup_ref, LookupRef, {
list().of_exactly(
Tx::literal_lookup_ref()
.with((Tx::entid(),
Tx::atom()))
.map(|(a, v)| LookupRef { a: a, v: v.without_spans() }))
});
def_parser!(Tx, entid_or_lookup_ref_or_temp_id, EntidOrLookupRefOrTempId, {
Tx::entid().map(EntidOrLookupRefOrTempId::Entid)
.or(Tx::lookup_ref().map(EntidOrLookupRefOrTempId::LookupRef))
.or(Tx::temp_id().map(EntidOrLookupRefOrTempId::TempId))
});
def_parser!(Tx, temp_id, TempId, {
satisfy_map(|x: edn::ValueAndSpan| x.into_text().map(TempId::External))
});
def_parser!(Tx, atom, edn::ValueAndSpan, {
satisfy_map(|x: edn::ValueAndSpan| x.into_atom())
});
def_parser!(Tx, nested_vector, Vec<AtomOrLookupRefOrVectorOrMapNotation>, {
vector().of_exactly(many(Tx::atom_or_lookup_ref_or_vector()))
});
def_parser!(Tx, atom_or_lookup_ref_or_vector, AtomOrLookupRefOrVectorOrMapNotation, {
Tx::lookup_ref().map(AtomOrLookupRefOrVectorOrMapNotation::LookupRef)
.or(Tx::nested_vector().map(AtomOrLookupRefOrVectorOrMapNotation::Vector))
.or(Tx::map_notation().map(AtomOrLookupRefOrVectorOrMapNotation::MapNotation))
.or(Tx::atom().map(AtomOrLookupRefOrVectorOrMapNotation::Atom))
});
def_matches_namespaced_keyword!(Tx, literal_db_add, "db", "add");
def_matches_namespaced_keyword!(Tx, literal_db_retract, "db", "retract");
def_parser!(Tx, add_or_retract, Entity, {
vector().of_exactly(
(Tx::literal_db_add().map(|_| OpType::Add).or(Tx::literal_db_retract().map(|_| OpType::Retract)),
Tx::entid_or_lookup_ref_or_temp_id(),
Tx::entid(),
Tx::atom_or_lookup_ref_or_vector())
.map(|(op, e, a, v)| {
Entity::AddOrRetract {
op: op,
e: e,
a: a,
v: v,
}
}))
});
def_parser!(Tx, map_notation, MapNotation, {
map()
.of_exactly(many((Tx::entid(), Tx::atom_or_lookup_ref_or_vector())))
.map(|avs: Vec<(Entid, AtomOrLookupRefOrVectorOrMapNotation)>| -> MapNotation {
avs.into_iter().collect()
})
});
def_parser!(Tx, entity, Entity, {
Tx::add_or_retract()
.or(Tx::map_notation().map(Entity::MapNotation))
});
def_parser!(Tx, entities, Vec<Entity>, {
vector().of_exactly(many(Tx::entity()))
});
impl Tx {
pub fn parse(input: edn::ValueAndSpan) -> std::result::Result<Vec<Entity>, errors::Error> {
Tx::entities()
.skip(eof())
.parse(input.into_atom_stream())
.map(|x| x.0)
.map_err(|e| Error::from_kind(ErrorKind::ParseError(e)))
}
fn parse_entid_or_lookup_ref_or_temp_id(input: edn::ValueAndSpan) -> std::result::Result<EntidOrLookupRefOrTempId, errors::Error> {
Tx::entid_or_lookup_ref_or_temp_id()
.skip(eof())
.parse(input.into_atom_stream())
.map(|x| x.0)
.map_err(|e| Error::from_kind(ErrorKind::ParseError(e)))
}
}
/// Remove any :db/id value from the given map notation, converting the returned value into
/// something suitable for the entity position rather than something suitable for a value position.
///
/// This is here simply to not expose some of the internal APIs of the tx-parser.
pub fn remove_db_id(map: &mut MapNotation) -> std::result::Result<Option<EntidOrLookupRefOrTempId>, errors::Error> {
// TODO: extract lazy defined constant.
let db_id_key = Entid::Ident(edn::NamespacedKeyword::new("db", "id"));
let db_id: Option<EntidOrLookupRefOrTempId> = if let Some(id) = map.remove(&db_id_key) {
match id {
AtomOrLookupRefOrVectorOrMapNotation::Atom(v) => {
let db_id = Tx::parse_entid_or_lookup_ref_or_temp_id(v)
.chain_err(|| Error::from(ErrorKind::DbIdError))?;
Some(db_id)
},
AtomOrLookupRefOrVectorOrMapNotation::LookupRef(_) |
AtomOrLookupRefOrVectorOrMapNotation::Vector(_) |
AtomOrLookupRefOrVectorOrMapNotation::MapNotation(_) => {
bail!(ErrorKind::DbIdError)
},
}
} else {
None
};
Ok(db_id)
}
#[cfg(test)]
mod tests {
use super::*;
use std::collections::BTreeMap;
use combine::Parser;
use edn::{
NamespacedKeyword,
PlainSymbol,
Span,
SpannedValue,
Value,
ValueAndSpan,
};
use mentat_tx::entities::{
Entid,
EntidOrLookupRefOrTempId,
Entity,
OpType,
AtomOrLookupRefOrVectorOrMapNotation,
};
fn kw(namespace: &str, name: &str) -> Value {
Value::NamespacedKeyword(NamespacedKeyword::new(namespace, name))
}
#[test]
fn test_add() {
let input = Value::Vector(vec![kw("db", "add"),
kw("test", "entid"),
kw("test", "a"),
Value::Text("v".into())]);
let mut parser = Tx::entity();
let result = parser.parse(input.with_spans().into_atom_stream()).map(|x| x.0);
assert_eq!(result,
Ok(Entity::AddOrRetract {
op: OpType::Add,
e: EntidOrLookupRefOrTempId::Entid(Entid::Ident(NamespacedKeyword::new("test",
"entid"))),
a: Entid::Ident(NamespacedKeyword::new("test", "a")),
v: AtomOrLookupRefOrVectorOrMapNotation::Atom(ValueAndSpan::new(SpannedValue::Text("v".into()), Span(29, 32))),
}));
}
#[test]
fn test_retract() {
let input = Value::Vector(vec![kw("db", "retract"),
Value::Integer(101),
kw("test", "a"),
Value::Text("v".into())]);
let mut parser = Tx::entity();
let result = parser.parse(input.with_spans().into_atom_stream()).map(|x| x.0);
assert_eq!(result,
Ok(Entity::AddOrRetract {
op: OpType::Retract,
e: EntidOrLookupRefOrTempId::Entid(Entid::Entid(101)),
a: Entid::Ident(NamespacedKeyword::new("test", "a")),
v: AtomOrLookupRefOrVectorOrMapNotation::Atom(ValueAndSpan::new(SpannedValue::Text("v".into()), Span(25, 28))),
}));
}
#[test]
fn test_lookup_ref() {
let input = Value::Vector(vec![kw("db", "add"),
Value::List(vec![Value::PlainSymbol(PlainSymbol::new("lookup-ref")),
kw("test", "a1"),
Value::Text("v1".into())].into_iter().collect()),
kw("test", "a"),
Value::Text("v".into())]);
let mut parser = Tx::entity();
let result = parser.parse(input.with_spans().into_atom_stream()).map(|x| x.0);
assert_eq!(result,
Ok(Entity::AddOrRetract {
op: OpType::Add,
e: EntidOrLookupRefOrTempId::LookupRef(LookupRef {
a: Entid::Ident(NamespacedKeyword::new("test", "a1")),
v: Value::Text("v1".into()),
}),
a: Entid::Ident(NamespacedKeyword::new("test", "a")),
v: AtomOrLookupRefOrVectorOrMapNotation::Atom(ValueAndSpan::new(SpannedValue::Text("v".into()), Span(44, 47))),
}));
}
#[test]
fn test_nested_vector() {
let input = Value::Vector(vec![kw("db", "add"),
Value::List(vec![Value::PlainSymbol(PlainSymbol::new("lookup-ref")),
kw("test", "a1"),
Value::Text("v1".into())].into_iter().collect()),
kw("test", "a"),
Value::Vector(vec![Value::Text("v1".into()), Value::Text("v2".into())])]);
let mut parser = Tx::entity();
let result = parser.parse(input.with_spans().into_atom_stream()).map(|x| x.0);
assert_eq!(result,
Ok(Entity::AddOrRetract {
op: OpType::Add,
e: EntidOrLookupRefOrTempId::LookupRef(LookupRef {
a: Entid::Ident(NamespacedKeyword::new("test", "a1")),
v: Value::Text("v1".into()),
}),
a: Entid::Ident(NamespacedKeyword::new("test", "a")),
v: AtomOrLookupRefOrVectorOrMapNotation::Vector(vec![AtomOrLookupRefOrVectorOrMapNotation::Atom(ValueAndSpan::new(SpannedValue::Text("v1".into()), Span(45, 49))),
AtomOrLookupRefOrVectorOrMapNotation::Atom(ValueAndSpan::new(SpannedValue::Text("v2".into()), Span(50, 54)))]),
}));
}
#[test]
fn test_map_notation() {
let mut expected: MapNotation = BTreeMap::default();
expected.insert(Entid::Ident(NamespacedKeyword::new("db", "id")), AtomOrLookupRefOrVectorOrMapNotation::Atom(ValueAndSpan::new(SpannedValue::Text("t".to_string()), Span(8, 11))));
expected.insert(Entid::Ident(NamespacedKeyword::new("db", "ident")), AtomOrLookupRefOrVectorOrMapNotation::Atom(ValueAndSpan::new(SpannedValue::NamespacedKeyword(NamespacedKeyword::new("test", "attribute")), Span(22, 37))));
let mut map: BTreeMap<Value, Value> = BTreeMap::default();
map.insert(kw("db", "id"), Value::Text("t".to_string()));
map.insert(kw("db", "ident"), kw("test", "attribute"));
let input = Value::Map(map.clone());
let mut parser = Tx::entity();
let result = parser.parse(input.with_spans().into_atom_stream()).map(|x| x.0);
assert_eq!(result,
Ok(Entity::MapNotation(expected)));
}
}