Allow required_types to constrain a variable be be one of several types.

2018-01-18 20:50:20 -05:00 · 2018-01-18 20:50:20 -05:00 · c9e2ff8146
commit c9e2ff8146
parent f3dc922571
12 changed files with 211 additions and 74 deletions
--- a/core/src/lib.rs
+++ b/core/src/lib.rs
@ -412,6 +412,12 @@ impl ValueTypeSet {
        ValueTypeSet(self.0.intersection(other.0))
    }

+    /// Returns the set difference between `self` and `other`, which is the
+    /// set of items in `self` that are not in `other`.
+    pub fn difference(&self, other: &ValueTypeSet) -> ValueTypeSet {
+        ValueTypeSet(self.0 - other.0)
+    }
+
    /// Return an arbitrary type that's part of this set.
    /// For a set containing a single type, this will be that type.
    pub fn exemplar(&self) -> Option<ValueType> {
@ -422,6 +428,11 @@ impl ValueTypeSet {
        self.0.is_subset(&other.0)
    }

+    /// Returns true if `self` and `other` contain no items in common.
+    pub fn is_disjoint(&self, other: &ValueTypeSet) -> bool {
+        self.0.is_disjoint(&other.0)
+    }
+
    pub fn contains(&self, vt: ValueType) -> bool {
        self.0.contains(&vt)
    }
@ -433,6 +444,10 @@ impl ValueTypeSet {
    pub fn is_unit(&self) -> bool {
        self.0.len() == 1
    }
+
+    pub fn iter(&self) -> ::enum_set::Iter<ValueType> {
+        self.0.iter()
+    }
 }

 impl IntoIterator for ValueTypeSet {
--- a/query-algebrizer/src/clauses/mod.rs
+++ b/query-algebrizer/src/clauses/mod.rs
@ -218,7 +218,7 @@ pub struct ConjoiningClauses {
    pub extracted_types: BTreeMap<Variable, QualifiedAlias>,

    /// Map of variables to the set of type requirements we have for them.
-    required_types: BTreeMap<Variable, ValueType>,
+    required_types: BTreeMap<Variable, ValueTypeSet>,
 }

 impl PartialEq for ConjoiningClauses {
@ -551,17 +551,27 @@ impl ConjoiningClauses {
        }
    }

-    pub fn add_type_requirement(&mut self, var: Variable, ty: ValueType) {
-        if let Some(existing) = self.required_types.insert(var.clone(), ty) {
-            // If we already have a required type for `var`, we're empty.
-            if existing != ty {
-                self.mark_known_empty(EmptyBecause::TypeMismatch {
-                    var: var.clone(),
-                    existing: ValueTypeSet::of_one(existing),
-                    desired: ValueTypeSet::of_one(ty)
-                });
-            }
+    /// Require that `var` be one of the types in `types`. If any existing
+    /// type requirements exist for `var`, the requirement after this
+    /// function returns will be the intersection of the requested types and
+    /// the type requirements in place prior to calling `add_type_requirement`.
+    ///
+    /// If the intersection will leave the variable so that it cannot be any
+    /// type, we'll call mark_known_empty.
+    pub fn add_type_requirement(&mut self, var: Variable, types: ValueTypeSet) {
+        let existing = self.required_types.get(&var).cloned().unwrap_or(ValueTypeSet::any());
+
+        // We have an existing requirement. The new requirement will be
+        // the intersection, but we'll mark_known_empty if that's empty.
+        let intersection = types.intersection(&existing);
+        if intersection.is_empty() {
+            self.mark_known_empty(EmptyBecause::TypeMismatch {
+                var: var.clone(),
+                existing: existing,
+                desired: types,
+            });
        }
+        self.required_types.insert(var, intersection);
    }

    /// Like `constrain_var_to_type` but in reverse: this expands the set of types
@ -872,34 +882,56 @@ impl ConjoiningClauses {
    }

    pub fn process_required_types(&mut self) -> Result<()> {
+        if self.empty_because.is_some() {
+            return Ok(())
+        }
        // We can't call `mark_known_empty` inside the loop since it would be a
        // mutable borrow on self while we're iterating over `self.required_types`.
        // Doing it like this avoids needing to copy `self.required_types`.
        let mut empty_because: Option<EmptyBecause> = None;
-        for (var, &ty) in self.required_types.iter() {
-            if let Some(&already_known) = self.known_types.get(var) {
-                if already_known.exemplar() == Some(ty) {
-                    // If we're already certain the type and the constraint are
-                    // the same, then there's no need to constrain anything.
-                    continue;
-                }
-                if !already_known.contains(ty) && empty_because.is_none() {
+        for (var, types) in self.required_types.iter() {
+            if let Some(already_known) = self.known_types.get(var) {
+                if already_known.is_disjoint(types) {
                    // If we know the constraint can't be one of the types
                    // the variable could take, then we know we're empty.
                    empty_because = Some(EmptyBecause::TypeMismatch {
                        var: var.clone(),
-                        existing: already_known,
-                        desired: ValueTypeSet::of_one(ty)
+                        existing: already_known.clone(),
+                        desired: types.clone(),
                    });
                    break;
                }
+                if already_known.is_subset(types) {
+                    // TODO: I'm not convinced that we can do nothing here.
+                    //
+                    // Consider `[:find ?x ?v :where [_ _ ?v] [(> ?v 10)] [?x :foo/long ?v]]`.
+                    //
+                    // That will produce SQL like:
+                    //
+                    // ```
+                    // SELECT datoms01.e AS `?x`, datoms00.v AS `?v`
+                    // FROM datoms datoms00, datoms01
+                    // WHERE datoms00.v > 10
+                    //  AND datoms01.v = datoms00.v
+                    //  AND datoms01.value_type_tag = datoms00.value_type_tag
+                    //  AND datoms01.a = 65537
+                    // ```
+                    //
+                    // Which is not optimal — the left side of the join will
+                    // produce lots of spurious bindings for datoms00.v.
+                    //
+                    // See https://github.com/mozilla/mentat/issues/520, and
+                    // https://github.com/mozilla/mentat/issues/293.
+                    continue;
+                }
            }
+
            let qa = self.extracted_types
-                .get(&var)
-                .ok_or_else(|| Error::from_kind(ErrorKind::UnboundVariable(var.name())))?;
-            self.wheres.add_intersection(ColumnConstraint::HasType {
+                         .get(&var)
+                         .ok_or_else(|| Error::from_kind(ErrorKind::UnboundVariable(var.name())))?;
+            self.wheres.add_intersection(ColumnConstraint::HasTypes {
                value: qa.0.clone(),
-                value_type: ty,
+                value_types: *types,
                strict: true,
            });
        }
--- a/query-algebrizer/src/clauses/not.rs
+++ b/query-algebrizer/src/clauses/not.rs
@ -53,10 +53,21 @@ impl ConjoiningClauses {
            template.apply_clause(&schema, clause)?;
        }

-        template.expand_column_bindings();
-        template.prune_extracted_types();
-        template.process_required_types()?;
+        if template.is_known_empty() {
+            return Ok(());
+        }

+        template.expand_column_bindings();
+        if template.is_known_empty() {
+            return Ok(());
+        }
+
+        template.prune_extracted_types();
+        if template.is_known_empty() {
+            return Ok(());
+        }
+
+        template.process_required_types()?;
        if template.is_known_empty() {
            return Ok(());
        }
--- a/query-algebrizer/src/clauses/predicate.rs
+++ b/query-algebrizer/src/clauses/predicate.rs
@ -53,7 +53,7 @@ impl ConjoiningClauses {
    /// There are several kinds of predicates in our Datalog:
    /// - A limited set of binary comparison operators: < > <= >= !=.
    ///   These are converted into SQLite binary comparisons and some type constraints.
-    /// - A set of type requirements constraining their argument to be a specific ValueType
+    /// - A set of type requirements constraining their argument to be a specific ValueType.
    /// - In the future, some predicates that are implemented via function calls in SQLite.
    ///
    /// At present we have implemented only the five built-in comparison binary operators.
@ -83,7 +83,7 @@ impl ConjoiningClauses {
        let mut args = pred.args.into_iter();

        if let FnArg::Variable(v) = args.next().unwrap() {
-            self.add_type_requirement(v, ty);
+            self.add_type_requirement(v, ValueTypeSet::of_one(ty));
            Ok(())
        } else {
            bail!(ErrorKind::InvalidArgument(pred.operator.clone(), "variable".into(), 0))
--- a/query-algebrizer/src/types.rs
+++ b/query-algebrizer/src/types.rs
@ -334,9 +334,9 @@ pub enum ColumnConstraint {
        left: QueryValue,
        right: QueryValue,
    },
-    HasType {
+    HasTypes {
        value: TableAlias,
-        value_type: ValueType,
+        value_types: ValueTypeSet,
        strict: bool,
    },
    NotExists(ComputedTable),
@ -345,7 +345,11 @@ pub enum ColumnConstraint {

 impl ColumnConstraint {
    pub fn has_type(value: TableAlias, value_type: ValueType) -> ColumnConstraint {
-        ColumnConstraint::HasType { value, value_type, strict: false }
+        ColumnConstraint::HasTypes {
+            value,
+            value_types: ValueTypeSet::of_one(value_type),
+            strict: false
+        }
    }
 }

@ -461,14 +465,20 @@ impl Debug for ColumnConstraint {
                write!(f, "{:?} MATCHES {:?}", qa, thing)
            },

-            &HasType { ref value, value_type, strict } => {
-                write!(f, "({:?}.value_type_tag = {:?}", value, value_type)?;
-                if strict && value_type == ValueType::Double || value_type == ValueType::Long {
-                    write!(f, " AND typeof({:?}) = '{:?}')", value,
-                           if value_type == ValueType::Double { "real" } else { "integer" })
-                } else {
-                    write!(f, ")")
+            &HasTypes { ref value, ref value_types, strict } => {
+                // This is cludgey, but it's debug code.
+                write!(f, "(")?;
+                for value_type in value_types.iter() {
+                    write!(f, "({:?}.value_type_tag = {:?}", value, value_type)?;
+                    if strict && value_type == ValueType::Double || value_type == ValueType::Long {
+                        write!(f, " AND typeof({:?}) = '{:?}')", value,
+                               if value_type == ValueType::Double { "real" } else { "integer" })?;
+                    } else {
+                        write!(f, ")")?;
+                    }
+                    write!(f, " OR ")?;
                }
+                write!(f, "1)")
            },
            &NotExists(ref ct) => {
                write!(f, "NOT EXISTS {:?}", ct)
--- a/query-algebrizer/tests/fulltext.rs
+++ b/query-algebrizer/tests/fulltext.rs
@ -13,7 +13,7 @@ extern crate mentat_query;
 extern crate mentat_query_algebrizer;
 extern crate mentat_query_parser;

-pub mod utils;
+mod utils;

 use mentat_core::{
    Attribute,
--- a/query-algebrizer/tests/ground.rs
+++ b/query-algebrizer/tests/ground.rs
@ -13,7 +13,7 @@ extern crate mentat_query;
 extern crate mentat_query_algebrizer;
 extern crate mentat_query_parser;

-pub mod utils;
+mod utils;

 use std::collections::BTreeMap;

--- a/query-algebrizer/tests/predicate.rs
+++ b/query-algebrizer/tests/predicate.rs
@ -13,7 +13,7 @@ extern crate mentat_query;
 extern crate mentat_query_algebrizer;
 extern crate mentat_query_parser;

-pub mod utils;
+mod utils;

 use mentat_core::{
    Attribute,
--- a/query-algebrizer/tests/type_reqs.rs
+++ b/query-algebrizer/tests/type_reqs.rs
@ -13,7 +13,7 @@ extern crate mentat_query;
 extern crate mentat_query_algebrizer;
 extern crate mentat_query_parser;

-pub mod utils;
+mod utils;

 use utils::{
    alg,
--- a/query-algebrizer/tests/utils/mod.rs
+++ b/query-algebrizer/tests/utils/mod.rs
@ -8,6 +8,11 @@
 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
 // specific language governing permissions and limitations under the License.

+// This is required to prevent warnings about unused functions in this file just
+// because it's unused in a single file (tests that don't use every function in
+// this module will get warnings otherwise).
+#![allow(dead_code)]
+
 use mentat_core::{
    Attribute,
    Entid,
@ -31,10 +36,7 @@ use mentat_query_algebrizer::{
    QueryInputs,
 };

-// Common utility functions used in multiple test files. Note: Import this with
-// `pub mod utils` (not `mod utils`), or you'll get spurious unused function
-// warnings when functions exist in this file but are only used by modules that
-// don't import with `pub` (yes, this is annoying).
+// Common utility functions used in multiple test files.

 // These are helpers that tests use to build Schema instances.
 pub fn associate_ident(schema: &mut Schema, i: NamespacedKeyword, e: Entid) {
@ -95,4 +97,3 @@ pub fn alg(schema: &Schema, input: &str) -> ConjoiningClauses {
    let parsed = parse_find_string(input).expect("query input to have parsed");
    algebrize(schema.into(), parsed).expect("algebrizing to have succeeded").cc
 }
-
--- a/query-translator/src/translate.rs
+++ b/query-translator/src/translate.rs
@ -12,6 +12,7 @@ use mentat_core::{
    SQLValueType,
    TypedValue,
    ValueType,
+    ValueTypeSet,
 };

 use mentat_query::Limit;
@ -158,30 +159,53 @@ impl ToConstraint for ColumnConstraint {
                    right: right.into(),
                }
            },
-            HasType { value: table, value_type, strict } => {
-                let type_column = QualifiedAlias::new(table.clone(), DatomsColumn::ValueTypeTag).to_column();
-                let loose = Constraint::equal(type_column,
-                                              ColumnOrExpression::Integer(value_type.value_type_tag()));
-                if !strict || (value_type != ValueType::Long && value_type != ValueType::Double) {
-                    loose
+            HasTypes { value: table, value_types, strict: strict_requested } => {
+                // If strict mode checking is on, and need to check exactly 1
+                // (not both or neither) of ValueType::Double and ValueType::Long
+                // we emit a Constraint::TypeCheck.
+                let num_numeric_checks = (value_types.contains(ValueType::Double) as i32) +
+                                         (value_types.contains(ValueType::Long) as i32);
+                let strict = strict_requested && num_numeric_checks == 1;
+
+                let types = if !strict && num_numeric_checks == 2 {
+                    // If we aren't operating in strict mode (either because it
+                    // wasn't requested, or because it doesn't make a difference),
+                    // and both ValueType::Double and ValueType::Long are being
+                    // checked, we remove the test for one of them, as they're
+                    // represented using the same value type tag (we choose to
+                    // remove the check for ValueType::Double, but it's an
+                    // arbitrary choice)
+                    value_types.difference(&ValueTypeSet::of_one(ValueType::Double))
                } else {
-                    // HasType has requested that we check for strict equality, and we're
-                    // checking a ValueType where that makes a difference (a numeric type).
-                    let val_column = QualifiedAlias::new(table, DatomsColumn::Value).to_column();
-                    Constraint::And {
-                        constraints: vec![
-                            loose,
-                            Constraint::TypeCheck {
-                                value: val_column,
-                                datatype: match value_type {
-                                    ValueType::Long => SQLDatatype::Integer,
-                                    ValueType::Double => SQLDatatype::Real,
-                                    _ => unreachable!()
+                    value_types
+                };
+                let constraints = types.into_iter().map(|ty| {
+                    let type_column = QualifiedAlias::new(table.clone(), DatomsColumn::ValueTypeTag).to_column();
+                    let loose = Constraint::equal(type_column, ColumnOrExpression::Integer(ty.value_type_tag()));
+                    if !strict || (ty != ValueType::Long && ty != ValueType::Double) {
+                        loose
+                    } else {
+                        let val_column = QualifiedAlias::new(table.clone(), DatomsColumn::Value).to_column();
+                        // We're handling strict equality for a numeric type, so we need to emit
+                        // a `typeof(col) = '(real|integer)'` too. This should happen a maximum of
+                        // once per `HasTypes`
+                        Constraint::And {
+                            constraints: vec![
+                                loose,
+                                Constraint::TypeCheck {
+                                    value: val_column,
+                                    datatype: match ty {
+                                        ValueType::Long => SQLDatatype::Integer,
+                                        ValueType::Double => SQLDatatype::Real,
+                                        _ => unreachable!()
+                                    }
                                }
-                            }
-                        ]
+                            ]
+                        }
                    }
-                }
+                }).collect::<Vec<_>>();
+
+                Constraint::Or { constraints }
            },

            NotExists(computed_table) => {
--- a/query-translator/tests/translate.rs
+++ b/query-translator/tests/translate.rs
@ -209,7 +209,7 @@ fn test_unknown_attribute_keyword_value() {
    let SQLQuery { sql, args } = translate(&schema, query);

    // Only match keywords, not strings: tag = 13.
-    assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = $v0 AND `datoms00`.value_type_tag = 13");
+    assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = $v0 AND (`datoms00`.value_type_tag = 13)");
    assert_eq!(args, vec![make_arg("$v0", ":ab/yyy")]);
 }

@ -222,7 +222,7 @@ fn test_unknown_attribute_string_value() {

    // We expect all_datoms because we're querying for a string. Magic, that.
    // We don't want keywords etc., so tag = 10.
-    assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x` FROM `all_datoms` AS `all_datoms00` WHERE `all_datoms00`.v = $v0 AND `all_datoms00`.value_type_tag = 10");
+    assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x` FROM `all_datoms` AS `all_datoms00` WHERE `all_datoms00`.v = $v0 AND (`all_datoms00`.value_type_tag = 10)");
    assert_eq!(args, vec![make_arg("$v0", "horses")]);
 }

@ -235,7 +235,7 @@ fn test_unknown_attribute_double_value() {

    // In general, doubles _could_ be 1.0, which might match a boolean or a ref. Set tag = 5 to
    // make sure we only match numbers.
-    assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = 9.95e0 AND `datoms00`.value_type_tag = 5");
+    assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = 9.95e0 AND (`datoms00`.value_type_tag = 5)");
    assert_eq!(args, vec![]);
 }

@ -286,6 +286,50 @@ fn test_unknown_ident() {
    assert_eq!("SELECT 1 LIMIT 0", sql);
 }

+#[test]
+fn test_type_required_long() {
+    let schema = Schema::default();
+
+    let query = r#"[:find ?x :where [?x _ ?e] [(long ?e)]]"#;
+    let SQLQuery { sql, args } = translate(&schema, query);
+
+    assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x` \
+                     FROM `all_datoms` AS `all_datoms00` \
+                     WHERE ((`all_datoms00`.value_type_tag = 5 AND \
+                             typeof(`all_datoms00`.v) = 'integer'))");
+
+    assert_eq!(args, vec![]);
+}
+
+#[test]
+fn test_type_required_double() {
+    let schema = Schema::default();
+
+    let query = r#"[:find ?x :where [?x _ ?e] [(double ?e)]]"#;
+    let SQLQuery { sql, args } = translate(&schema, query);
+
+    assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x` \
+                     FROM `all_datoms` AS `all_datoms00` \
+                     WHERE ((`all_datoms00`.value_type_tag = 5 AND \
+                             typeof(`all_datoms00`.v) = 'real'))");
+
+    assert_eq!(args, vec![]);
+}
+
+#[test]
+fn test_type_required_boolean() {
+    let schema = Schema::default();
+
+    let query = r#"[:find ?x :where [?x _ ?e] [(boolean ?e)]]"#;
+    let SQLQuery { sql, args } = translate(&schema, query);
+
+    assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x` \
+                     FROM `all_datoms` AS `all_datoms00` \
+                     WHERE (`all_datoms00`.value_type_tag = 1)");
+
+    assert_eq!(args, vec![]);
+}
+
 #[test]
 fn test_numeric_less_than_unknown_attribute() {
    let schema = Schema::default();
@ -751,7 +795,7 @@ fn test_unbound_attribute_with_ground() {
                                     `all_datoms00`.value_type_tag AS `?v_value_type_tag` \
                     FROM `all_datoms` AS `all_datoms00` \
                     WHERE NOT EXISTS (SELECT 1 WHERE `all_datoms00`.v = 17 AND \
-                                                      `all_datoms00`.value_type_tag = 5)");
+                                                     (`all_datoms00`.value_type_tag = 5))");
 }