Pull improvements (#682) r=nalexander

* Parse and handle aliased pull attributes.
* Allow :db/id to be mentioned as a pull attribute.
* Clean up comment.
* Remove unused function.
This commit is contained in:
Richard Newman 2018-05-13 14:15:36 -07:00 committed by GitHub
parent 4fde4fe0a6
commit 60cb5d2432
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 170 additions and 47 deletions

View file

@ -315,6 +315,12 @@ impl From<StructuredMap> for Binding {
}
}
impl From<Vec<Binding>> for Binding {
fn from(value: Vec<Binding>) -> Self {
Binding::Vec(ValueRc::new(value))
}
}
impl Binding {
pub fn val(self) -> Option<TypedValue> {
match self {

View file

@ -67,6 +67,7 @@ use self::mentat_query::{
Order,
OrJoin,
OrWhereClause,
NamedPullAttribute,
NotJoin,
Pattern,
PatternNonValuePlace,
@ -191,6 +192,7 @@ def_parser!(Query, order, Order, {
def_matches_plain_symbol!(Query, the, "the");
def_matches_plain_symbol!(Query, pull, "pull");
def_matches_plain_symbol!(Query, wildcard, "*");
def_matches_keyword!(Query, alias_as, "as");
pub struct Where<'a>(std::marker::PhantomData<&'a ()>);
@ -303,11 +305,19 @@ def_parser!(Query, aggregate, Aggregate, {
})
});
def_parser!(Query, pull_concrete_attribute_ident, PullConcreteAttribute, {
forward_keyword().map(|k| PullConcreteAttribute::Ident(::std::rc::Rc::new(k.clone())))
});
def_parser!(Query, pull_concrete_attribute, PullAttributeSpec, {
forward_keyword().map(|k|
(Query::pull_concrete_attribute_ident(),
optional(try(Query::alias_as().with(forward_keyword().map(|alias| ::std::rc::Rc::new(alias.clone()))))))
.map(|(attribute, alias)|
PullAttributeSpec::Attribute(
PullConcreteAttribute::Ident(
::std::rc::Rc::new(k.clone()))))
NamedPullAttribute {
attribute,
alias: alias,
}))
});
def_parser!(Query, pull_wildcard_attribute, PullAttributeSpec, {
@ -1205,23 +1215,27 @@ mod test {
let foo_bar = ::std::rc::Rc::new(edn::Keyword::namespaced("foo", "bar"));
let foo_baz = ::std::rc::Rc::new(edn::Keyword::namespaced("foo", "baz"));
let foo_horse = ::std::rc::Rc::new(edn::Keyword::namespaced("foo", "horse"));
assert_edn_parses_to!(Query::pull_concrete_attribute,
":foo/bar",
PullAttributeSpec::Attribute(
PullConcreteAttribute::Ident(foo_bar.clone())));
PullConcreteAttribute::Ident(foo_bar.clone()).into()));
assert_edn_parses_to!(Query::pull_attribute,
":foo/bar",
PullAttributeSpec::Attribute(
PullConcreteAttribute::Ident(foo_bar.clone())));
PullConcreteAttribute::Ident(foo_bar.clone()).into()));
assert_edn_parses_to!(Find::elem,
"(pull ?v [:foo/bar :foo/baz])",
"(pull ?v [:foo/bar :as :foo/horse, :foo/baz])",
Element::Pull(Pull {
var: Variable::from_valid_name("?v"),
patterns: vec![
PullAttributeSpec::Attribute(
PullConcreteAttribute::Ident(foo_bar.clone())),
NamedPullAttribute {
attribute: PullConcreteAttribute::Ident(foo_bar.clone()),
alias: Some(foo_horse),
}),
PullAttributeSpec::Attribute(
PullConcreteAttribute::Ident(foo_baz.clone())),
PullConcreteAttribute::Ident(foo_baz.clone()).into()),
],
}));
assert_parse_failure_contains!(Find::elem,
@ -1242,7 +1256,7 @@ mod test {
PullAttributeSpec::Attribute(
PullConcreteAttribute::Ident(
::std::rc::Rc::new(edn::Keyword::namespaced("foo", "bar"))
)
).into()
),
] })]),
where_clauses: vec![

View file

@ -22,6 +22,11 @@ error_chain! {
description("unnamed attribute")
display("attribute {:?} has no name", id)
}
RepeatedDbId {
description(":db/id repeated")
display(":db/id repeated")
}
}
links {

View file

@ -79,18 +79,21 @@ use std::iter::{
};
use mentat_core::{
Binding,
Cloned,
Entid,
HasSchema,
Keyword,
Schema,
StructuredMap,
TypedValue,
ValueRc,
};
use mentat_db::cache;
use mentat_query::{
NamedPullAttribute,
PullAttributeSpec,
PullConcreteAttribute,
};
@ -110,7 +113,7 @@ pub fn pull_attributes_for_entity<A>(schema: &Schema,
attributes: A) -> Result<StructuredMap>
where A: IntoIterator<Item=Entid> {
let attrs = attributes.into_iter()
.map(|e| PullAttributeSpec::Attribute(PullConcreteAttribute::Entid(e)))
.map(|e| PullAttributeSpec::Attribute(PullConcreteAttribute::Entid(e).into()))
.collect();
Puller::prepare(schema, attrs)?
.pull(schema, db, once(entity))
@ -130,7 +133,7 @@ pub fn pull_attributes_for_entities<E, A>(schema: &Schema,
where E: IntoIterator<Item=Entid>,
A: IntoIterator<Item=Entid> {
let attrs = attributes.into_iter()
.map(|e| PullAttributeSpec::Attribute(PullConcreteAttribute::Entid(e)))
.map(|e| PullAttributeSpec::Attribute(PullConcreteAttribute::Entid(e).into()))
.collect();
Puller::prepare(schema, attrs)?
.pull(schema, db, entities)
@ -142,16 +145,16 @@ pub struct Puller {
// The range is the set of aliases to use in the output.
attributes: BTreeMap<Entid, ValueRc<Keyword>>,
attribute_spec: cache::AttributeSpec,
// If this is set, each pulled entity is contributed to its own output map, labeled with this
// keyword. This is a divergence from Datomic, which has no types by which to differentiate a
// long from an entity ID, and thus represents all entities in pull as, _e.g._, `{:db/id 1234}`.
// Mentat can use `TypedValue::Ref(1234)`, but it's sometimes convenient to fetch the entity ID
// itself as part of a pull expression: `{:person 1234, :person/name "Peter"}`.
db_id_alias: Option<ValueRc<Keyword>>,
}
impl Puller {
pub fn prepare_simple_attributes(schema: &Schema, attributes: Vec<Entid>) -> Result<Puller> {
Puller::prepare(schema,
attributes.into_iter()
.map(|e| PullAttributeSpec::Attribute(PullConcreteAttribute::Entid(e)))
.collect())
}
pub fn prepare(schema: &Schema, attributes: Vec<PullAttributeSpec>) -> Result<Puller> {
// TODO: eventually this entry point will handle aliasing and that kind of
// thing. For now it's just a convenience.
@ -165,6 +168,9 @@ impl Puller {
let mut names: BTreeMap<Entid, ValueRc<Keyword>> = Default::default();
let mut attrs: BTreeSet<Entid> = Default::default();
let db_id = ::std::rc::Rc::new(Keyword::namespaced("db", "id"));
let mut db_id_alias = None;
for attr in attributes.iter() {
match attr {
&PullAttributeSpec::Wildcard => {
@ -175,22 +181,42 @@ impl Puller {
}
break;
},
&PullAttributeSpec::Attribute(PullConcreteAttribute::Ident(ref i)) => {
if let Some(entid) = schema.get_entid(i) {
names.insert(entid.into(), i.to_value_rc());
attrs.insert(entid.into());
&PullAttributeSpec::Attribute(NamedPullAttribute {
ref attribute,
ref alias,
}) => {
let alias = alias.as_ref()
.map(|ref r| r.to_value_rc());
match attribute {
// Handle :db/id.
&PullConcreteAttribute::Ident(ref i) if i.as_ref() == db_id.as_ref() => {
// We only allow :db/id once.
if db_id_alias.is_some() {
bail!(ErrorKind::RepeatedDbId);
}
db_id_alias = Some(alias.unwrap_or_else(|| db_id.to_value_rc()));
},
&PullConcreteAttribute::Ident(ref i) => {
if let Some(entid) = schema.get_entid(i) {
let name = alias.unwrap_or_else(|| i.to_value_rc());
names.insert(entid.into(), name);
attrs.insert(entid.into());
}
},
&PullConcreteAttribute::Entid(ref entid) => {
let name = alias.map(Ok).unwrap_or_else(|| lookup_name(entid))?;
names.insert(*entid, name);
attrs.insert(*entid);
},
}
},
&PullAttributeSpec::Attribute(PullConcreteAttribute::Entid(ref entid)) => {
names.insert(*entid, lookup_name(entid)?);
attrs.insert(*entid);
},
}
}
Ok(Puller {
attributes: names,
attribute_spec: cache::AttributeSpec::specified(&attrs, schema),
db_id_alias,
})
}
@ -205,9 +231,7 @@ impl Puller {
// - Recursing. (TODO: we'll need AttributeCaches to not overwrite in case of recursion! And
// ideally not do excess work when some entity/attribute pairs are known.)
// - Building a structure by walking the pull expression with the caches.
// TODO: aliases.
// TODO: limits.
// TODO: fts.
// Build a cache for these attributes and entities.
// TODO: use the store's existing cache!
@ -222,6 +246,17 @@ impl Puller {
// TODO: should we walk `e` then `a`, or `a` then `e`? Possibly the right answer
// is just to collect differently!
let mut maps = BTreeMap::new();
// Collect :db/id if requested.
if let Some(ref alias) = self.db_id_alias {
for e in entities.iter() {
let mut r = maps.entry(*e)
.or_insert(ValueRc::new(StructuredMap::default()));
let mut m = ValueRc::get_mut(r).unwrap();
m.insert(alias.clone(), Binding::Scalar(TypedValue::Ref(*e)));
}
}
for (name, cache) in self.attributes.iter().filter_map(|(a, name)|
caches.forward_attribute_cache_for_attribute(schema, *a)
.map(|cache| (name.clone(), cache))) {

View file

@ -498,14 +498,28 @@ pub enum PullConcreteAttribute {
Entid(i64),
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct NamedPullAttribute {
pub attribute: PullConcreteAttribute,
pub alias: Option<Rc<Keyword>>,
}
impl From<PullConcreteAttribute> for NamedPullAttribute {
fn from(a: PullConcreteAttribute) -> Self {
NamedPullAttribute {
attribute: a,
alias: None,
}
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum PullAttributeSpec {
Wildcard,
Attribute(PullConcreteAttribute),
Attribute(NamedPullAttribute),
// PullMapSpec(Vec<…>),
// AttributeWithOpts(PullConcreteAttribute, …),
// LimitedAttribute(PullConcreteAttribute, u64), // Limit nil => Attribute instead.
// DefaultedAttribute(PullConcreteAttribute, PullDefaultValue),
// LimitedAttribute(NamedPullAttribute, u64), // Limit nil => Attribute instead.
// DefaultedAttribute(NamedPullAttribute, PullDefaultValue),
}
impl std::fmt::Display for PullConcreteAttribute {
@ -521,14 +535,25 @@ impl std::fmt::Display for PullConcreteAttribute {
}
}
impl std::fmt::Display for NamedPullAttribute {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
if let &Some(ref alias) = &self.alias {
write!(f, "{} :as {}", self.attribute, alias)
} else {
write!(f, "{}", self.attribute)
}
}
}
impl std::fmt::Display for PullAttributeSpec {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
&PullAttributeSpec::Wildcard => {
write!(f, "*")
},
&PullAttributeSpec::Attribute(ref a) => {
write!(f, "{}", a)
&PullAttributeSpec::Attribute(ref attr) => {
write!(f, "{}", attr)
},
}
}

View file

@ -116,7 +116,9 @@ fn test_simple_pull() {
assert_eq!(pulled, expected);
// Now test pull inside the query itself.
let query = r#"[:find ?hood (pull ?district [:district/name :district/region])
let query = r#"[:find ?hood (pull ?district [:db/id
:district/name :as :district/district
:district/region])
:where
(or [?hood :neighborhood/name "Beacon Hill"]
[?hood :neighborhood/name "Capitol Hill"])
@ -127,22 +129,24 @@ fn test_simple_pull() {
.into_rel_result()
.expect("results");
let beacon_district: Vec<(Keyword, TypedValue)> = vec![
(kw!(:district/name), "Greater Duwamish".into()),
(kw!(:district/region), schema.get_entid(&Keyword::namespaced("region", "se")).unwrap().into())
let beacon_district_pull: Vec<(Keyword, TypedValue)> = vec![
(kw!(:db/id), TypedValue::Ref(beacon_district)),
(kw!(:district/district), "Greater Duwamish".into()),
(kw!(:district/region), schema.get_entid(&Keyword::namespaced("region", "se")).unwrap().into()),
];
let beacon_district: StructuredMap = beacon_district.into();
let capitol_district: Vec<(Keyword, TypedValue)> = vec![
(kw!(:district/name), "East".into()),
(kw!(:district/region), schema.get_entid(&Keyword::namespaced("region", "e")).unwrap().into())
let beacon_district_pull: StructuredMap = beacon_district_pull.into();
let capitol_district_pull: Vec<(Keyword, TypedValue)> = vec![
(kw!(:db/id), TypedValue::Ref(capitol_district)),
(kw!(:district/district), "East".into()),
(kw!(:district/region), schema.get_entid(&Keyword::namespaced("region", "e")).unwrap().into()),
];
let capitol_district: StructuredMap = capitol_district.into();
let capitol_district_pull: StructuredMap = capitol_district_pull.into();
let expected = RelResult {
width: 2,
values: vec![
TypedValue::Ref(capitol).into(), capitol_district.into(),
TypedValue::Ref(beacon).into(), beacon_district.into(),
TypedValue::Ref(capitol).into(), capitol_district_pull.into(),
TypedValue::Ref(beacon).into(), beacon_district_pull.into(),
].into(),
};
assert_eq!(results, expected.clone());
@ -158,14 +162,19 @@ fn test_simple_pull() {
// Execute a scalar query where the body is constant.
// TODO: we shouldn't require `:where`; that makes this non-constant!
let query = r#"[:find (pull ?hood [:neighborhood/name]) . :in ?hood
let query = r#"[:find (pull ?hood [:db/id :as :neighborhood/id
:neighborhood/name]) .
:in ?hood
:where [?hood :neighborhood/district _]]"#;
let result = reader.q_once(query, QueryInputs::with_value_sequence(vec![(var!(?hood), TypedValue::Ref(beacon))]))
.into_scalar_result()
.expect("success")
.expect("result");
let expected: StructuredMap = vec![(kw!(:neighborhood/name), TypedValue::from("Beacon Hill"))].into();
let expected: StructuredMap = vec![
(kw!(:neighborhood/name), TypedValue::from("Beacon Hill")),
(kw!(:neighborhood/id), TypedValue::Ref(beacon)),
].into();
assert_eq!(result, expected.into());
// Collect the names and regions of all districts.
@ -207,6 +216,35 @@ fn test_simple_pull() {
let expected: Vec<Binding> = expected.into_iter().map(|m| m.into()).collect();
assert_eq!(results, expected);
// Pull fulltext.
let query = r#"[:find [(pull ?c [:community/name :community/category]) ...]
:where
[?c :community/name ?name]
[?c :community/type :community.type/website]
[(fulltext $ :community/category "food") [[?c ?cat]]]]"#;
let results = reader.q_once(query, None)
.into_coll_result()
.expect("result");
let expected: Vec<StructuredMap> = vec![
vec![(kw!(:community/name), Binding::Scalar(TypedValue::from("Community Harvest of Southwest Seattle"))),
(kw!(:community/category), vec![Binding::Scalar(TypedValue::from("sustainable food"))].into())].into(),
vec![(kw!(:community/name), Binding::Scalar(TypedValue::from("InBallard"))),
(kw!(:community/category), vec![Binding::Scalar(TypedValue::from("shopping")),
Binding::Scalar(TypedValue::from("food")),
Binding::Scalar(TypedValue::from("nightlife")),
Binding::Scalar(TypedValue::from("services"))].into())].into(),
vec![(kw!(:community/name), Binding::Scalar(TypedValue::from("Seattle Chinatown Guide"))),
(kw!(:community/category), vec![Binding::Scalar(TypedValue::from("shopping")),
Binding::Scalar(TypedValue::from("food"))].into())].into(),
vec![(kw!(:community/name), Binding::Scalar(TypedValue::from("University District Food Bank"))),
(kw!(:community/category), vec![Binding::Scalar(TypedValue::from("food bank"))].into())].into(),
];
let expected: Vec<Binding> = expected.into_iter().map(|m| m.into()).collect();
assert_eq!(results, expected);
}
// TEST: