From 01db9232b4c5ee82fc0427bbad3792e1c50cab85 Mon Sep 17 00:00:00 2001 From: Richard Newman Date: Tue, 29 May 2018 16:45:53 +0200 Subject: [PATCH] Include namespace-separating solidus in NamespaceableName; improve type handling around ground (#713) r=nalexander * Include the namespace-separating solidus in NamespaceableName. * Use type annotations when deciding how to process ambiguous ground input. * Include simple patterns in the type extraction phase of pattern application. (#705) * Review comment. * Add a test. --- edn/src/namespaceable_name.rs | 31 ++++++++++++++++++------- edn/src/symbols.rs | 19 ++++++++------- query-algebrizer/src/clauses/convert.rs | 15 ++++++++---- query-algebrizer/src/clauses/mod.rs | 25 +++++++++++++++++--- query-algebrizer/tests/ground.rs | 13 +++++++++++ 5 files changed, 78 insertions(+), 25 deletions(-) diff --git a/edn/src/namespaceable_name.rs b/edn/src/namespaceable_name.rs index a0aa1d99..e072c7cc 100644 --- a/edn/src/namespaceable_name.rs +++ b/edn/src/namespaceable_name.rs @@ -32,20 +32,20 @@ use serde::ser::{ #[derive(Clone, Eq, Hash, PartialEq)] pub struct NamespaceableName { // The bytes that make up the namespace followed directly by those - // that make up the name. + // that make up the name. If there is a namespace, a solidus ('/') is between + // the two parts. components: String, - // The index (in bytes) into `components` where the namespace ends and - // name begins. + // The index (in bytes) into `components` of the dividing solidus — the character + // between the namespace and the name. // // If this is zero, it means that this is _not_ a namespaced value! // - // Important: The following invariants around `boundary` must be maintained - // for memory safety. + // Important: The following invariants around `boundary` must be maintained: // // 1. `boundary` must always be less than or equal to `components.len()`. - // 2. `boundary` must be byte index that points to a character boundary, - // and not point into the middle of a utf8 codepoint. That is, + // 2. `boundary` must be a byte index that points to a character boundary, + // and not point into the middle of a UTF-8 codepoint. That is, // `components.is_char_boundary(boundary)` must always be true. // // These invariants are enforced by `NamespaceableName::namespaced()`, and since @@ -79,6 +79,7 @@ impl NamespaceableName { let mut dest = String::with_capacity(n.len() + ns.len()); dest.push_str(ns); + dest.push('/'); dest.push_str(n); let boundary = ns.len(); @@ -135,13 +136,19 @@ impl NamespaceableName { if self.boundary == 0 { &self.components } else { - &self.components[self.boundary..] + &self.components[(self.boundary + 1)..] } } #[inline] pub fn components<'a>(&'a self) -> (&'a str, &'a str) { - self.components.split_at(self.boundary) + if self.boundary > 0 { + (&self.components[0..self.boundary], + &self.components[(self.boundary + 1)..]) + } else { + (&self.components[0..0], + &self.components) + } } } @@ -177,6 +184,12 @@ impl fmt::Debug for NamespaceableName { } } +impl fmt::Display for NamespaceableName { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.write_str(&self.components) + } +} + // This is convoluted, but the basic idea is that since we don't want to rely on our input being // correct, we'll need to implement a custom serializer no matter what (e.g. we can't just // `derive(Deserialize)` since `unsafe` code depends on `self.boundary` being a valid index). diff --git a/edn/src/symbols.rs b/edn/src/symbols.rs index 9c2c2cb9..95ba3c81 100644 --- a/edn/src/symbols.rs +++ b/edn/src/symbols.rs @@ -8,7 +8,12 @@ // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. -use std::fmt::{Display, Formatter}; +use std::fmt::{ + Display, + Formatter, + Write, +}; + use namespaceable_name::NamespaceableName; #[macro_export] @@ -264,7 +269,7 @@ impl Display for PlainSymbol { /// assert_eq!("baz", PlainSymbol::plain("baz").to_string()); /// ``` fn fmt(&self, f: &mut Formatter) -> ::std::fmt::Result { - write!(f, "{}", self.0) + self.0.fmt(f) } } @@ -278,7 +283,7 @@ impl Display for NamespacedSymbol { /// assert_eq!("bar/baz", NamespacedSymbol::namespaced("bar", "baz").to_string()); /// ``` fn fmt(&self, f: &mut Formatter) -> ::std::fmt::Result { - write!(f, "{}/{}", self.namespace(), self.name()) + self.0.fmt(f) } } @@ -295,12 +300,8 @@ impl Display for Keyword { /// assert_eq!(":bar/baz", Keyword::namespaced("bar", "baz").to_reversed().to_reversed().to_string()); /// ``` fn fmt(&self, f: &mut Formatter) -> ::std::fmt::Result { - if self.0.is_namespaced() { - let (ns, name) = self.0.components(); - write!(f, ":{}/{}", ns, name) - } else { - write!(f, ":{}", self.0.name()) - } + f.write_char(':')?; + self.0.fmt(f) } } diff --git a/query-algebrizer/src/clauses/convert.rs b/query-algebrizer/src/clauses/convert.rs index 9869f2c2..9c9c1ab1 100644 --- a/query-algebrizer/src/clauses/convert.rs +++ b/query-algebrizer/src/clauses/convert.rs @@ -119,12 +119,19 @@ impl ConjoiningClauses { })); } + let constrained_types; + if let Some(required) = self.required_types.get(var) { + constrained_types = known_types.intersection(required); + } else { + constrained_types = known_types; + } + match arg { // Longs are potentially ambiguous: they might be longs or entids. FnArg::EntidOrInteger(x) => { match (ValueType::Ref.accommodates_integer(x), - known_types.contains(ValueType::Ref), - known_types.contains(ValueType::Long)) { + constrained_types.contains(ValueType::Ref), + constrained_types.contains(ValueType::Long)) { (true, true, true) => { // Ambiguous: this arg could be an entid or a long. // We default to long. @@ -159,8 +166,8 @@ impl ConjoiningClauses { // If you definitely want to look up an ident, do it before running the query. FnArg::IdentOrKeyword(x) => { - match (known_types.contains(ValueType::Ref), - known_types.contains(ValueType::Keyword)) { + match (constrained_types.contains(ValueType::Ref), + constrained_types.contains(ValueType::Keyword)) { (true, true) => { // Ambiguous: this could be a keyword or an ident. // Default to keyword. diff --git a/query-algebrizer/src/clauses/mod.rs b/query-algebrizer/src/clauses/mod.rs index 603c6dae..29c840bf 100644 --- a/query-algebrizer/src/clauses/mod.rs +++ b/query-algebrizer/src/clauses/mod.rs @@ -48,7 +48,6 @@ use mentat_query::{ WhereClause, }; -#[cfg(test)] use mentat_query::{ PatternNonValuePlace, }; @@ -1068,11 +1067,31 @@ impl ConjoiningClauses { Ok(()) } + fn mark_as_ref(&mut self, pos: &PatternNonValuePlace) { + if let &PatternNonValuePlace::Variable(ref var) = pos { + self.constrain_var_to_type(var.clone(), ValueType::Ref) + } + } + pub(crate) fn apply_clauses(&mut self, known: Known, where_clauses: Vec) -> Result<()> { // We apply (top level) type predicates first as an optimization. for clause in where_clauses.iter() { - if let &WhereClause::TypeAnnotation(ref anno) = clause { - self.apply_type_anno(anno)?; + match clause { + &WhereClause::TypeAnnotation(ref anno) => { + self.apply_type_anno(anno)?; + }, + + // Patterns are common, so let's grab as much type information from + // them as we can. + &WhereClause::Pattern(ref p) => { + self.mark_as_ref(&p.entity); + self.mark_as_ref(&p.attribute); + self.mark_as_ref(&p.tx); + }, + + // TODO: if we wish we can include other kinds of clauses in this type + // extraction phase. + _ => {}, } } diff --git a/query-algebrizer/tests/ground.rs b/query-algebrizer/tests/ground.rs index 150b031e..a07ed081 100644 --- a/query-algebrizer/tests/ground.rs +++ b/query-algebrizer/tests/ground.rs @@ -225,6 +225,19 @@ fn test_ground_tuple_infers_types() { assert_eq!(cc.bound_value(&Variable::from_valid_name("?v")), Some(TypedValue::Long(10))); } +// We determine the types of variables in the query in an early first pass, and thus we can +// safely use idents to name entities, including attributes. +#[test] +fn test_ground_coll_infers_attribute_types() { + let q = r#"[:find ?x + :where [(ground [:foo/age :foo/height]) [?a ...]] + [?x ?a ?v]]"#; + let schema = prepopulated_schema(); + let known = Known::for_schema(&schema); + let cc = alg(known, &q); + assert!(cc.empty_because.is_none()); +} + #[test] fn test_ground_rel_infers_types() { let q = r#"[:find ?x :where [?x :foo/age ?v] [(ground [[8 10]]) [[?x ?v]]]]"#;