Include namespace-separating solidus in NamespaceableName; improve type handling around ground (#713) r=nalexander

* Include the namespace-separating solidus in NamespaceableName. * Use type annotations when deciding how to process ambiguous ground input. * Include simple patterns in the type extraction phase of pattern application. (#705) * Review comment. * Add a test.
2018-05-29 16:45:53 +02:00 · 2018-05-29 16:45:53 +02:00 · 01db9232b4
commit 01db9232b4
parent e4447927c7
5 changed files with 78 additions and 25 deletions
--- a/edn/src/namespaceable_name.rs
+++ b/edn/src/namespaceable_name.rs
@ -32,20 +32,20 @@ use serde::ser::{
 #[derive(Clone, Eq, Hash, PartialEq)]
 pub struct NamespaceableName {
    // The bytes that make up the namespace followed directly by those
-    // that make up the name.
+    // that make up the name. If there is a namespace, a solidus ('/') is between
+    // the two parts.
    components: String,

-    // The index (in bytes) into `components` where the namespace ends and
-    // name begins.
+    // The index (in bytes) into `components` of the dividing solidus — the character
+    // between the namespace and the name.
    //
    // If this is zero, it means that this is _not_ a namespaced value!
    //
-    // Important: The following invariants around `boundary` must be maintained
-    // for memory safety.
+    // Important: The following invariants around `boundary` must be maintained:
    //
    // 1. `boundary` must always be less than or equal to `components.len()`.
-    // 2. `boundary` must be byte index that points to a character boundary,
-    //     and not point into the middle of a utf8 codepoint. That is,
+    // 2. `boundary` must be a byte index that points to a character boundary,
+    //     and not point into the middle of a UTF-8 codepoint. That is,
    //    `components.is_char_boundary(boundary)` must always be true.
    //
    // These invariants are enforced by `NamespaceableName::namespaced()`, and since
@ -79,6 +79,7 @@ impl NamespaceableName {
        let mut dest = String::with_capacity(n.len() + ns.len());

        dest.push_str(ns);
+        dest.push('/');
        dest.push_str(n);

        let boundary = ns.len();
@ -135,13 +136,19 @@ impl NamespaceableName {
        if self.boundary == 0 {
            &self.components
        } else {
-            &self.components[self.boundary..]
+            &self.components[(self.boundary + 1)..]
        }
    }

    #[inline]
    pub fn components<'a>(&'a self) -> (&'a str, &'a str) {
-        self.components.split_at(self.boundary)
+        if self.boundary > 0 {
+            (&self.components[0..self.boundary],
+             &self.components[(self.boundary + 1)..])
+        } else {
+            (&self.components[0..0],
+             &self.components)
+        }
    }
 }

@ -177,6 +184,12 @@ impl fmt::Debug for NamespaceableName {
    }
 }

+impl fmt::Display for NamespaceableName {
+    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+        fmt.write_str(&self.components)
+    }
+}
+
 // This is convoluted, but the basic idea is that since we don't want to rely on our input being
 // correct, we'll need to implement a custom serializer no matter what (e.g. we can't just
 // `derive(Deserialize)` since `unsafe` code depends on `self.boundary` being a valid index).
--- a/edn/src/symbols.rs
+++ b/edn/src/symbols.rs
@ -8,7 +8,12 @@
 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
 // specific language governing permissions and limitations under the License.

-use std::fmt::{Display, Formatter};
+use std::fmt::{
+    Display,
+    Formatter,
+    Write,
+};
+
 use namespaceable_name::NamespaceableName;

 #[macro_export]
@ -264,7 +269,7 @@ impl Display for PlainSymbol {
    /// assert_eq!("baz", PlainSymbol::plain("baz").to_string());
    /// ```
    fn fmt(&self, f: &mut Formatter) -> ::std::fmt::Result {
-        write!(f, "{}", self.0)
+        self.0.fmt(f)
    }
 }

@ -278,7 +283,7 @@ impl Display for NamespacedSymbol {
    /// assert_eq!("bar/baz", NamespacedSymbol::namespaced("bar", "baz").to_string());
    /// ```
    fn fmt(&self, f: &mut Formatter) -> ::std::fmt::Result {
-        write!(f, "{}/{}", self.namespace(), self.name())
+        self.0.fmt(f)
    }
 }

@ -295,12 +300,8 @@ impl Display for Keyword {
    /// assert_eq!(":bar/baz", Keyword::namespaced("bar", "baz").to_reversed().to_reversed().to_string());
    /// ```
    fn fmt(&self, f: &mut Formatter) -> ::std::fmt::Result {
-        if self.0.is_namespaced() {
-            let (ns, name) = self.0.components();
-            write!(f, ":{}/{}", ns, name)
-        } else {
-            write!(f, ":{}", self.0.name())
-        }
+        f.write_char(':')?;
+        self.0.fmt(f)
    }
 }

--- a/query-algebrizer/src/clauses/convert.rs
+++ b/query-algebrizer/src/clauses/convert.rs
@ -119,12 +119,19 @@ impl ConjoiningClauses {
            }));
        }

+        let constrained_types;
+        if let Some(required) = self.required_types.get(var) {
+            constrained_types = known_types.intersection(required);
+        } else {
+            constrained_types = known_types;
+        }
+
        match arg {
            // Longs are potentially ambiguous: they might be longs or entids.
            FnArg::EntidOrInteger(x) => {
                match (ValueType::Ref.accommodates_integer(x),
-                       known_types.contains(ValueType::Ref),
-                       known_types.contains(ValueType::Long)) {
+                       constrained_types.contains(ValueType::Ref),
+                       constrained_types.contains(ValueType::Long)) {
                    (true, true, true) => {
                        // Ambiguous: this arg could be an entid or a long.
                        // We default to long.
@ -159,8 +166,8 @@ impl ConjoiningClauses {

            // If you definitely want to look up an ident, do it before running the query.
            FnArg::IdentOrKeyword(x) => {
-                match (known_types.contains(ValueType::Ref),
-                       known_types.contains(ValueType::Keyword)) {
+                match (constrained_types.contains(ValueType::Ref),
+                       constrained_types.contains(ValueType::Keyword)) {
                    (true, true) => {
                        // Ambiguous: this could be a keyword or an ident.
                        // Default to keyword.
--- a/query-algebrizer/src/clauses/mod.rs
+++ b/query-algebrizer/src/clauses/mod.rs
@ -48,7 +48,6 @@ use mentat_query::{
    WhereClause,
 };

-#[cfg(test)]
 use mentat_query::{
    PatternNonValuePlace,
 };
@ -1068,11 +1067,31 @@ impl ConjoiningClauses {
        Ok(())
    }

+    fn mark_as_ref(&mut self, pos: &PatternNonValuePlace) {
+        if let &PatternNonValuePlace::Variable(ref var) = pos {
+            self.constrain_var_to_type(var.clone(), ValueType::Ref)
+        }
+    }
+
    pub(crate) fn apply_clauses(&mut self, known: Known, where_clauses: Vec<WhereClause>) -> Result<()> {
        // We apply (top level) type predicates first as an optimization.
        for clause in where_clauses.iter() {
-            if let &WhereClause::TypeAnnotation(ref anno) = clause {
-                self.apply_type_anno(anno)?;
+            match clause {
+                &WhereClause::TypeAnnotation(ref anno) => {
+                    self.apply_type_anno(anno)?;
+                },
+
+                // Patterns are common, so let's grab as much type information from
+                // them as we can.
+                &WhereClause::Pattern(ref p) => {
+                    self.mark_as_ref(&p.entity);
+                    self.mark_as_ref(&p.attribute);
+                    self.mark_as_ref(&p.tx);
+                },
+
+                // TODO: if we wish we can include other kinds of clauses in this type
+                // extraction phase.
+                _ => {},
            }
        }

--- a/query-algebrizer/tests/ground.rs
+++ b/query-algebrizer/tests/ground.rs
@ -225,6 +225,19 @@ fn test_ground_tuple_infers_types() {
    assert_eq!(cc.bound_value(&Variable::from_valid_name("?v")), Some(TypedValue::Long(10)));
 }

+// We determine the types of variables in the query in an early first pass, and thus we can
+// safely use idents to name entities, including attributes.
+#[test]
+fn test_ground_coll_infers_attribute_types() {
+    let q = r#"[:find ?x
+                :where [(ground [:foo/age :foo/height]) [?a ...]]
+                       [?x ?a ?v]]"#;
+    let schema = prepopulated_schema();
+    let known = Known::for_schema(&schema);
+    let cc = alg(known, &q);
+    assert!(cc.empty_because.is_none());
+}
+
 #[test]
 fn test_ground_rel_infers_types() {
    let q = r#"[:find ?x :where [?x :foo/age ?v] [(ground [[8 10]]) [[?x ?v]]]]"#;