From 01db9232b4c5ee82fc0427bbad3792e1c50cab85 Mon Sep 17 00:00:00 2001
From: Richard Newman <rnewman@twinql.com>
Date: Tue, 29 May 2018 16:45:53 +0200
Subject: [PATCH] Include namespace-separating solidus in NamespaceableName;
 improve type handling around ground (#713) r=nalexander

* Include the namespace-separating solidus in NamespaceableName.
* Use type annotations when deciding how to process ambiguous ground input.
* Include simple patterns in the type extraction phase of pattern application. (#705)
* Review comment.
* Add a test.
---
 edn/src/namespaceable_name.rs           | 31 ++++++++++++++++++-------
 edn/src/symbols.rs                      | 19 ++++++++-------
 query-algebrizer/src/clauses/convert.rs | 15 ++++++++----
 query-algebrizer/src/clauses/mod.rs     | 25 +++++++++++++++++---
 query-algebrizer/tests/ground.rs        | 13 +++++++++++
 5 files changed, 78 insertions(+), 25 deletions(-)

diff --git a/edn/src/namespaceable_name.rs b/edn/src/namespaceable_name.rs
index a0aa1d99..e072c7cc 100644
--- a/edn/src/namespaceable_name.rs
+++ b/edn/src/namespaceable_name.rs
@@ -32,20 +32,20 @@ use serde::ser::{
 #[derive(Clone, Eq, Hash, PartialEq)]
 pub struct NamespaceableName {
     // The bytes that make up the namespace followed directly by those
-    // that make up the name.
+    // that make up the name. If there is a namespace, a solidus ('/') is between
+    // the two parts.
     components: String,
 
-    // The index (in bytes) into `components` where the namespace ends and
-    // name begins.
+    // The index (in bytes) into `components` of the dividing solidus — the character
+    // between the namespace and the name.
     //
     // If this is zero, it means that this is _not_ a namespaced value!
     //
-    // Important: The following invariants around `boundary` must be maintained
-    // for memory safety.
+    // Important: The following invariants around `boundary` must be maintained:
     //
     // 1. `boundary` must always be less than or equal to `components.len()`.
-    // 2. `boundary` must be byte index that points to a character boundary,
-    //     and not point into the middle of a utf8 codepoint. That is,
+    // 2. `boundary` must be a byte index that points to a character boundary,
+    //     and not point into the middle of a UTF-8 codepoint. That is,
     //    `components.is_char_boundary(boundary)` must always be true.
     //
     // These invariants are enforced by `NamespaceableName::namespaced()`, and since
@@ -79,6 +79,7 @@ impl NamespaceableName {
         let mut dest = String::with_capacity(n.len() + ns.len());
 
         dest.push_str(ns);
+        dest.push('/');
         dest.push_str(n);
 
         let boundary = ns.len();
@@ -135,13 +136,19 @@ impl NamespaceableName {
         if self.boundary == 0 {
             &self.components
         } else {
-            &self.components[self.boundary..]
+            &self.components[(self.boundary + 1)..]
         }
     }
 
     #[inline]
     pub fn components<'a>(&'a self) -> (&'a str, &'a str) {
-        self.components.split_at(self.boundary)
+        if self.boundary > 0 {
+            (&self.components[0..self.boundary],
+             &self.components[(self.boundary + 1)..])
+        } else {
+            (&self.components[0..0],
+             &self.components)
+        }
     }
 }
 
@@ -177,6 +184,12 @@ impl fmt::Debug for NamespaceableName {
     }
 }
 
+impl fmt::Display for NamespaceableName {
+    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+        fmt.write_str(&self.components)
+    }
+}
+
 // This is convoluted, but the basic idea is that since we don't want to rely on our input being
 // correct, we'll need to implement a custom serializer no matter what (e.g. we can't just
 // `derive(Deserialize)` since `unsafe` code depends on `self.boundary` being a valid index).
diff --git a/edn/src/symbols.rs b/edn/src/symbols.rs
index 9c2c2cb9..95ba3c81 100644
--- a/edn/src/symbols.rs
+++ b/edn/src/symbols.rs
@@ -8,7 +8,12 @@
 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
 // specific language governing permissions and limitations under the License.
 
-use std::fmt::{Display, Formatter};
+use std::fmt::{
+    Display,
+    Formatter,
+    Write,
+};
+
 use namespaceable_name::NamespaceableName;
 
 #[macro_export]
@@ -264,7 +269,7 @@ impl Display for PlainSymbol {
     /// assert_eq!("baz", PlainSymbol::plain("baz").to_string());
     /// ```
     fn fmt(&self, f: &mut Formatter) -> ::std::fmt::Result {
-        write!(f, "{}", self.0)
+        self.0.fmt(f)
     }
 }
 
@@ -278,7 +283,7 @@ impl Display for NamespacedSymbol {
     /// assert_eq!("bar/baz", NamespacedSymbol::namespaced("bar", "baz").to_string());
     /// ```
     fn fmt(&self, f: &mut Formatter) -> ::std::fmt::Result {
-        write!(f, "{}/{}", self.namespace(), self.name())
+        self.0.fmt(f)
     }
 }
 
@@ -295,12 +300,8 @@ impl Display for Keyword {
     /// assert_eq!(":bar/baz", Keyword::namespaced("bar", "baz").to_reversed().to_reversed().to_string());
     /// ```
     fn fmt(&self, f: &mut Formatter) -> ::std::fmt::Result {
-        if self.0.is_namespaced() {
-            let (ns, name) = self.0.components();
-            write!(f, ":{}/{}", ns, name)
-        } else {
-            write!(f, ":{}", self.0.name())
-        }
+        f.write_char(':')?;
+        self.0.fmt(f)
     }
 }
 
diff --git a/query-algebrizer/src/clauses/convert.rs b/query-algebrizer/src/clauses/convert.rs
index 9869f2c2..9c9c1ab1 100644
--- a/query-algebrizer/src/clauses/convert.rs
+++ b/query-algebrizer/src/clauses/convert.rs
@@ -119,12 +119,19 @@ impl ConjoiningClauses {
             }));
         }
 
+        let constrained_types;
+        if let Some(required) = self.required_types.get(var) {
+            constrained_types = known_types.intersection(required);
+        } else {
+            constrained_types = known_types;
+        }
+
         match arg {
             // Longs are potentially ambiguous: they might be longs or entids.
             FnArg::EntidOrInteger(x) => {
                 match (ValueType::Ref.accommodates_integer(x),
-                       known_types.contains(ValueType::Ref),
-                       known_types.contains(ValueType::Long)) {
+                       constrained_types.contains(ValueType::Ref),
+                       constrained_types.contains(ValueType::Long)) {
                     (true, true, true) => {
                         // Ambiguous: this arg could be an entid or a long.
                         // We default to long.
@@ -159,8 +166,8 @@ impl ConjoiningClauses {
 
             // If you definitely want to look up an ident, do it before running the query.
             FnArg::IdentOrKeyword(x) => {
-                match (known_types.contains(ValueType::Ref),
-                       known_types.contains(ValueType::Keyword)) {
+                match (constrained_types.contains(ValueType::Ref),
+                       constrained_types.contains(ValueType::Keyword)) {
                     (true, true) => {
                         // Ambiguous: this could be a keyword or an ident.
                         // Default to keyword.
diff --git a/query-algebrizer/src/clauses/mod.rs b/query-algebrizer/src/clauses/mod.rs
index 603c6dae..29c840bf 100644
--- a/query-algebrizer/src/clauses/mod.rs
+++ b/query-algebrizer/src/clauses/mod.rs
@@ -48,7 +48,6 @@ use mentat_query::{
     WhereClause,
 };
 
-#[cfg(test)]
 use mentat_query::{
     PatternNonValuePlace,
 };
@@ -1068,11 +1067,31 @@ impl ConjoiningClauses {
         Ok(())
     }
 
+    fn mark_as_ref(&mut self, pos: &PatternNonValuePlace) {
+        if let &PatternNonValuePlace::Variable(ref var) = pos {
+            self.constrain_var_to_type(var.clone(), ValueType::Ref)
+        }
+    }
+
     pub(crate) fn apply_clauses(&mut self, known: Known, where_clauses: Vec<WhereClause>) -> Result<()> {
         // We apply (top level) type predicates first as an optimization.
         for clause in where_clauses.iter() {
-            if let &WhereClause::TypeAnnotation(ref anno) = clause {
-                self.apply_type_anno(anno)?;
+            match clause {
+                &WhereClause::TypeAnnotation(ref anno) => {
+                    self.apply_type_anno(anno)?;
+                },
+
+                // Patterns are common, so let's grab as much type information from
+                // them as we can.
+                &WhereClause::Pattern(ref p) => {
+                    self.mark_as_ref(&p.entity);
+                    self.mark_as_ref(&p.attribute);
+                    self.mark_as_ref(&p.tx);
+                },
+
+                // TODO: if we wish we can include other kinds of clauses in this type
+                // extraction phase.
+                _ => {},
             }
         }
 
diff --git a/query-algebrizer/tests/ground.rs b/query-algebrizer/tests/ground.rs
index 150b031e..a07ed081 100644
--- a/query-algebrizer/tests/ground.rs
+++ b/query-algebrizer/tests/ground.rs
@@ -225,6 +225,19 @@ fn test_ground_tuple_infers_types() {
     assert_eq!(cc.bound_value(&Variable::from_valid_name("?v")), Some(TypedValue::Long(10)));
 }
 
+// We determine the types of variables in the query in an early first pass, and thus we can
+// safely use idents to name entities, including attributes.
+#[test]
+fn test_ground_coll_infers_attribute_types() {
+    let q = r#"[:find ?x
+                :where [(ground [:foo/age :foo/height]) [?a ...]]
+                       [?x ?a ?v]]"#;
+    let schema = prepopulated_schema();
+    let known = Known::for_schema(&schema);
+    let cc = alg(known, &q);
+    assert!(cc.empty_because.is_none());
+}
+
 #[test]
 fn test_ground_rel_infers_types() {
     let q = r#"[:find ?x :where [?x :foo/age ?v] [(ground [[8 10]]) [[?x ?v]]]]"#;