From 60cb5d2432a17e2a462bdf414d62d30bb69d5f01 Mon Sep 17 00:00:00 2001 From: Richard Newman Date: Sun, 13 May 2018 14:15:36 -0700 Subject: [PATCH] Pull improvements (#682) r=nalexander * Parse and handle aliased pull attributes. * Allow :db/id to be mentioned as a pull attribute. * Clean up comment. * Remove unused function. --- core/src/types.rs | 6 ++++ query-parser/src/parse.rs | 32 ++++++++++++----- query-pull/src/errors.rs | 5 +++ query-pull/src/lib.rs | 73 +++++++++++++++++++++++++++++---------- query/src/lib.rs | 37 ++++++++++++++++---- tests/pull.rs | 64 +++++++++++++++++++++++++++------- 6 files changed, 170 insertions(+), 47 deletions(-) diff --git a/core/src/types.rs b/core/src/types.rs index 01690b34..ad426a12 100644 --- a/core/src/types.rs +++ b/core/src/types.rs @@ -315,6 +315,12 @@ impl From for Binding { } } +impl From> for Binding { + fn from(value: Vec) -> Self { + Binding::Vec(ValueRc::new(value)) + } +} + impl Binding { pub fn val(self) -> Option { match self { diff --git a/query-parser/src/parse.rs b/query-parser/src/parse.rs index 9cbaf4b2..aabb7f64 100644 --- a/query-parser/src/parse.rs +++ b/query-parser/src/parse.rs @@ -67,6 +67,7 @@ use self::mentat_query::{ Order, OrJoin, OrWhereClause, + NamedPullAttribute, NotJoin, Pattern, PatternNonValuePlace, @@ -191,6 +192,7 @@ def_parser!(Query, order, Order, { def_matches_plain_symbol!(Query, the, "the"); def_matches_plain_symbol!(Query, pull, "pull"); def_matches_plain_symbol!(Query, wildcard, "*"); +def_matches_keyword!(Query, alias_as, "as"); pub struct Where<'a>(std::marker::PhantomData<&'a ()>); @@ -303,11 +305,19 @@ def_parser!(Query, aggregate, Aggregate, { }) }); +def_parser!(Query, pull_concrete_attribute_ident, PullConcreteAttribute, { + forward_keyword().map(|k| PullConcreteAttribute::Ident(::std::rc::Rc::new(k.clone()))) +}); + def_parser!(Query, pull_concrete_attribute, PullAttributeSpec, { - forward_keyword().map(|k| + (Query::pull_concrete_attribute_ident(), + optional(try(Query::alias_as().with(forward_keyword().map(|alias| ::std::rc::Rc::new(alias.clone())))))) + .map(|(attribute, alias)| PullAttributeSpec::Attribute( - PullConcreteAttribute::Ident( - ::std::rc::Rc::new(k.clone())))) + NamedPullAttribute { + attribute, + alias: alias, + })) }); def_parser!(Query, pull_wildcard_attribute, PullAttributeSpec, { @@ -1205,23 +1215,27 @@ mod test { let foo_bar = ::std::rc::Rc::new(edn::Keyword::namespaced("foo", "bar")); let foo_baz = ::std::rc::Rc::new(edn::Keyword::namespaced("foo", "baz")); + let foo_horse = ::std::rc::Rc::new(edn::Keyword::namespaced("foo", "horse")); assert_edn_parses_to!(Query::pull_concrete_attribute, ":foo/bar", PullAttributeSpec::Attribute( - PullConcreteAttribute::Ident(foo_bar.clone()))); + PullConcreteAttribute::Ident(foo_bar.clone()).into())); assert_edn_parses_to!(Query::pull_attribute, ":foo/bar", PullAttributeSpec::Attribute( - PullConcreteAttribute::Ident(foo_bar.clone()))); + PullConcreteAttribute::Ident(foo_bar.clone()).into())); assert_edn_parses_to!(Find::elem, - "(pull ?v [:foo/bar :foo/baz])", + "(pull ?v [:foo/bar :as :foo/horse, :foo/baz])", Element::Pull(Pull { var: Variable::from_valid_name("?v"), patterns: vec![ PullAttributeSpec::Attribute( - PullConcreteAttribute::Ident(foo_bar.clone())), + NamedPullAttribute { + attribute: PullConcreteAttribute::Ident(foo_bar.clone()), + alias: Some(foo_horse), + }), PullAttributeSpec::Attribute( - PullConcreteAttribute::Ident(foo_baz.clone())), + PullConcreteAttribute::Ident(foo_baz.clone()).into()), ], })); assert_parse_failure_contains!(Find::elem, @@ -1242,7 +1256,7 @@ mod test { PullAttributeSpec::Attribute( PullConcreteAttribute::Ident( ::std::rc::Rc::new(edn::Keyword::namespaced("foo", "bar")) - ) + ).into() ), ] })]), where_clauses: vec![ diff --git a/query-pull/src/errors.rs b/query-pull/src/errors.rs index f0e747e6..94c38426 100644 --- a/query-pull/src/errors.rs +++ b/query-pull/src/errors.rs @@ -22,6 +22,11 @@ error_chain! { description("unnamed attribute") display("attribute {:?} has no name", id) } + + RepeatedDbId { + description(":db/id repeated") + display(":db/id repeated") + } } links { diff --git a/query-pull/src/lib.rs b/query-pull/src/lib.rs index 7138ddc5..1b2d7804 100644 --- a/query-pull/src/lib.rs +++ b/query-pull/src/lib.rs @@ -79,18 +79,21 @@ use std::iter::{ }; use mentat_core::{ + Binding, Cloned, Entid, HasSchema, Keyword, Schema, StructuredMap, + TypedValue, ValueRc, }; use mentat_db::cache; use mentat_query::{ + NamedPullAttribute, PullAttributeSpec, PullConcreteAttribute, }; @@ -110,7 +113,7 @@ pub fn pull_attributes_for_entity(schema: &Schema, attributes: A) -> Result where A: IntoIterator { let attrs = attributes.into_iter() - .map(|e| PullAttributeSpec::Attribute(PullConcreteAttribute::Entid(e))) + .map(|e| PullAttributeSpec::Attribute(PullConcreteAttribute::Entid(e).into())) .collect(); Puller::prepare(schema, attrs)? .pull(schema, db, once(entity)) @@ -130,7 +133,7 @@ pub fn pull_attributes_for_entities(schema: &Schema, where E: IntoIterator, A: IntoIterator { let attrs = attributes.into_iter() - .map(|e| PullAttributeSpec::Attribute(PullConcreteAttribute::Entid(e))) + .map(|e| PullAttributeSpec::Attribute(PullConcreteAttribute::Entid(e).into())) .collect(); Puller::prepare(schema, attrs)? .pull(schema, db, entities) @@ -142,16 +145,16 @@ pub struct Puller { // The range is the set of aliases to use in the output. attributes: BTreeMap>, attribute_spec: cache::AttributeSpec, + + // If this is set, each pulled entity is contributed to its own output map, labeled with this + // keyword. This is a divergence from Datomic, which has no types by which to differentiate a + // long from an entity ID, and thus represents all entities in pull as, _e.g._, `{:db/id 1234}`. + // Mentat can use `TypedValue::Ref(1234)`, but it's sometimes convenient to fetch the entity ID + // itself as part of a pull expression: `{:person 1234, :person/name "Peter"}`. + db_id_alias: Option>, } impl Puller { - pub fn prepare_simple_attributes(schema: &Schema, attributes: Vec) -> Result { - Puller::prepare(schema, - attributes.into_iter() - .map(|e| PullAttributeSpec::Attribute(PullConcreteAttribute::Entid(e))) - .collect()) - } - pub fn prepare(schema: &Schema, attributes: Vec) -> Result { // TODO: eventually this entry point will handle aliasing and that kind of // thing. For now it's just a convenience. @@ -165,6 +168,9 @@ impl Puller { let mut names: BTreeMap> = Default::default(); let mut attrs: BTreeSet = Default::default(); + let db_id = ::std::rc::Rc::new(Keyword::namespaced("db", "id")); + let mut db_id_alias = None; + for attr in attributes.iter() { match attr { &PullAttributeSpec::Wildcard => { @@ -175,22 +181,42 @@ impl Puller { } break; }, - &PullAttributeSpec::Attribute(PullConcreteAttribute::Ident(ref i)) => { - if let Some(entid) = schema.get_entid(i) { - names.insert(entid.into(), i.to_value_rc()); - attrs.insert(entid.into()); + &PullAttributeSpec::Attribute(NamedPullAttribute { + ref attribute, + ref alias, + }) => { + let alias = alias.as_ref() + .map(|ref r| r.to_value_rc()); + match attribute { + // Handle :db/id. + &PullConcreteAttribute::Ident(ref i) if i.as_ref() == db_id.as_ref() => { + // We only allow :db/id once. + if db_id_alias.is_some() { + bail!(ErrorKind::RepeatedDbId); + } + db_id_alias = Some(alias.unwrap_or_else(|| db_id.to_value_rc())); + }, + &PullConcreteAttribute::Ident(ref i) => { + if let Some(entid) = schema.get_entid(i) { + let name = alias.unwrap_or_else(|| i.to_value_rc()); + names.insert(entid.into(), name); + attrs.insert(entid.into()); + } + }, + &PullConcreteAttribute::Entid(ref entid) => { + let name = alias.map(Ok).unwrap_or_else(|| lookup_name(entid))?; + names.insert(*entid, name); + attrs.insert(*entid); + }, } }, - &PullAttributeSpec::Attribute(PullConcreteAttribute::Entid(ref entid)) => { - names.insert(*entid, lookup_name(entid)?); - attrs.insert(*entid); - }, } } Ok(Puller { attributes: names, attribute_spec: cache::AttributeSpec::specified(&attrs, schema), + db_id_alias, }) } @@ -205,9 +231,7 @@ impl Puller { // - Recursing. (TODO: we'll need AttributeCaches to not overwrite in case of recursion! And // ideally not do excess work when some entity/attribute pairs are known.) // - Building a structure by walking the pull expression with the caches. - // TODO: aliases. // TODO: limits. - // TODO: fts. // Build a cache for these attributes and entities. // TODO: use the store's existing cache! @@ -222,6 +246,17 @@ impl Puller { // TODO: should we walk `e` then `a`, or `a` then `e`? Possibly the right answer // is just to collect differently! let mut maps = BTreeMap::new(); + + // Collect :db/id if requested. + if let Some(ref alias) = self.db_id_alias { + for e in entities.iter() { + let mut r = maps.entry(*e) + .or_insert(ValueRc::new(StructuredMap::default())); + let mut m = ValueRc::get_mut(r).unwrap(); + m.insert(alias.clone(), Binding::Scalar(TypedValue::Ref(*e))); + } + } + for (name, cache) in self.attributes.iter().filter_map(|(a, name)| caches.forward_attribute_cache_for_attribute(schema, *a) .map(|cache| (name.clone(), cache))) { diff --git a/query/src/lib.rs b/query/src/lib.rs index 488c4f37..7c692949 100644 --- a/query/src/lib.rs +++ b/query/src/lib.rs @@ -498,14 +498,28 @@ pub enum PullConcreteAttribute { Entid(i64), } +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct NamedPullAttribute { + pub attribute: PullConcreteAttribute, + pub alias: Option>, +} + +impl From for NamedPullAttribute { + fn from(a: PullConcreteAttribute) -> Self { + NamedPullAttribute { + attribute: a, + alias: None, + } + } +} + #[derive(Clone, Debug, Eq, PartialEq)] pub enum PullAttributeSpec { Wildcard, - Attribute(PullConcreteAttribute), + Attribute(NamedPullAttribute), // PullMapSpec(Vec<…>), - // AttributeWithOpts(PullConcreteAttribute, …), - // LimitedAttribute(PullConcreteAttribute, u64), // Limit nil => Attribute instead. - // DefaultedAttribute(PullConcreteAttribute, PullDefaultValue), + // LimitedAttribute(NamedPullAttribute, u64), // Limit nil => Attribute instead. + // DefaultedAttribute(NamedPullAttribute, PullDefaultValue), } impl std::fmt::Display for PullConcreteAttribute { @@ -521,14 +535,25 @@ impl std::fmt::Display for PullConcreteAttribute { } } +impl std::fmt::Display for NamedPullAttribute { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + if let &Some(ref alias) = &self.alias { + write!(f, "{} :as {}", self.attribute, alias) + } else { + write!(f, "{}", self.attribute) + } + } +} + + impl std::fmt::Display for PullAttributeSpec { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { match self { &PullAttributeSpec::Wildcard => { write!(f, "*") }, - &PullAttributeSpec::Attribute(ref a) => { - write!(f, "{}", a) + &PullAttributeSpec::Attribute(ref attr) => { + write!(f, "{}", attr) }, } } diff --git a/tests/pull.rs b/tests/pull.rs index e8eb6468..c4923d09 100644 --- a/tests/pull.rs +++ b/tests/pull.rs @@ -116,7 +116,9 @@ fn test_simple_pull() { assert_eq!(pulled, expected); // Now test pull inside the query itself. - let query = r#"[:find ?hood (pull ?district [:district/name :district/region]) + let query = r#"[:find ?hood (pull ?district [:db/id + :district/name :as :district/district + :district/region]) :where (or [?hood :neighborhood/name "Beacon Hill"] [?hood :neighborhood/name "Capitol Hill"]) @@ -127,22 +129,24 @@ fn test_simple_pull() { .into_rel_result() .expect("results"); - let beacon_district: Vec<(Keyword, TypedValue)> = vec![ - (kw!(:district/name), "Greater Duwamish".into()), - (kw!(:district/region), schema.get_entid(&Keyword::namespaced("region", "se")).unwrap().into()) + let beacon_district_pull: Vec<(Keyword, TypedValue)> = vec![ + (kw!(:db/id), TypedValue::Ref(beacon_district)), + (kw!(:district/district), "Greater Duwamish".into()), + (kw!(:district/region), schema.get_entid(&Keyword::namespaced("region", "se")).unwrap().into()), ]; - let beacon_district: StructuredMap = beacon_district.into(); - let capitol_district: Vec<(Keyword, TypedValue)> = vec![ - (kw!(:district/name), "East".into()), - (kw!(:district/region), schema.get_entid(&Keyword::namespaced("region", "e")).unwrap().into()) + let beacon_district_pull: StructuredMap = beacon_district_pull.into(); + let capitol_district_pull: Vec<(Keyword, TypedValue)> = vec![ + (kw!(:db/id), TypedValue::Ref(capitol_district)), + (kw!(:district/district), "East".into()), + (kw!(:district/region), schema.get_entid(&Keyword::namespaced("region", "e")).unwrap().into()), ]; - let capitol_district: StructuredMap = capitol_district.into(); + let capitol_district_pull: StructuredMap = capitol_district_pull.into(); let expected = RelResult { width: 2, values: vec![ - TypedValue::Ref(capitol).into(), capitol_district.into(), - TypedValue::Ref(beacon).into(), beacon_district.into(), + TypedValue::Ref(capitol).into(), capitol_district_pull.into(), + TypedValue::Ref(beacon).into(), beacon_district_pull.into(), ].into(), }; assert_eq!(results, expected.clone()); @@ -158,14 +162,19 @@ fn test_simple_pull() { // Execute a scalar query where the body is constant. // TODO: we shouldn't require `:where`; that makes this non-constant! - let query = r#"[:find (pull ?hood [:neighborhood/name]) . :in ?hood + let query = r#"[:find (pull ?hood [:db/id :as :neighborhood/id + :neighborhood/name]) . + :in ?hood :where [?hood :neighborhood/district _]]"#; let result = reader.q_once(query, QueryInputs::with_value_sequence(vec![(var!(?hood), TypedValue::Ref(beacon))])) .into_scalar_result() .expect("success") .expect("result"); - let expected: StructuredMap = vec![(kw!(:neighborhood/name), TypedValue::from("Beacon Hill"))].into(); + let expected: StructuredMap = vec![ + (kw!(:neighborhood/name), TypedValue::from("Beacon Hill")), + (kw!(:neighborhood/id), TypedValue::Ref(beacon)), + ].into(); assert_eq!(result, expected.into()); // Collect the names and regions of all districts. @@ -207,6 +216,35 @@ fn test_simple_pull() { let expected: Vec = expected.into_iter().map(|m| m.into()).collect(); assert_eq!(results, expected); + // Pull fulltext. + let query = r#"[:find [(pull ?c [:community/name :community/category]) ...] + :where + [?c :community/name ?name] + [?c :community/type :community.type/website] + [(fulltext $ :community/category "food") [[?c ?cat]]]]"#; + let results = reader.q_once(query, None) + .into_coll_result() + .expect("result"); + let expected: Vec = vec![ + vec![(kw!(:community/name), Binding::Scalar(TypedValue::from("Community Harvest of Southwest Seattle"))), + (kw!(:community/category), vec![Binding::Scalar(TypedValue::from("sustainable food"))].into())].into(), + + vec![(kw!(:community/name), Binding::Scalar(TypedValue::from("InBallard"))), + (kw!(:community/category), vec![Binding::Scalar(TypedValue::from("shopping")), + Binding::Scalar(TypedValue::from("food")), + Binding::Scalar(TypedValue::from("nightlife")), + Binding::Scalar(TypedValue::from("services"))].into())].into(), + + vec![(kw!(:community/name), Binding::Scalar(TypedValue::from("Seattle Chinatown Guide"))), + (kw!(:community/category), vec![Binding::Scalar(TypedValue::from("shopping")), + Binding::Scalar(TypedValue::from("food"))].into())].into(), + + vec![(kw!(:community/name), Binding::Scalar(TypedValue::from("University District Food Bank"))), + (kw!(:community/category), vec![Binding::Scalar(TypedValue::from("food bank"))].into())].into(), + ]; + + let expected: Vec = expected.into_iter().map(|m| m.into()).collect(); + assert_eq!(results, expected); } // TEST: