diff --git a/core/src/lib.rs b/core/src/lib.rs index 316eab17..f66eac5a 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -412,6 +412,12 @@ impl ValueTypeSet { ValueTypeSet(self.0.intersection(other.0)) } + /// Returns the set difference between `self` and `other`, which is the + /// set of items in `self` that are not in `other`. + pub fn difference(&self, other: &ValueTypeSet) -> ValueTypeSet { + ValueTypeSet(self.0 - other.0) + } + /// Return an arbitrary type that's part of this set. /// For a set containing a single type, this will be that type. pub fn exemplar(&self) -> Option { @@ -422,6 +428,11 @@ impl ValueTypeSet { self.0.is_subset(&other.0) } + /// Returns true if `self` and `other` contain no items in common. + pub fn is_disjoint(&self, other: &ValueTypeSet) -> bool { + self.0.is_disjoint(&other.0) + } + pub fn contains(&self, vt: ValueType) -> bool { self.0.contains(&vt) } @@ -433,6 +444,10 @@ impl ValueTypeSet { pub fn is_unit(&self) -> bool { self.0.len() == 1 } + + pub fn iter(&self) -> ::enum_set::Iter { + self.0.iter() + } } impl IntoIterator for ValueTypeSet { diff --git a/query-algebrizer/src/clauses/mod.rs b/query-algebrizer/src/clauses/mod.rs index 52ab4b38..e587fdeb 100644 --- a/query-algebrizer/src/clauses/mod.rs +++ b/query-algebrizer/src/clauses/mod.rs @@ -218,7 +218,7 @@ pub struct ConjoiningClauses { pub extracted_types: BTreeMap, /// Map of variables to the set of type requirements we have for them. - required_types: BTreeMap, + required_types: BTreeMap, } impl PartialEq for ConjoiningClauses { @@ -551,17 +551,27 @@ impl ConjoiningClauses { } } - pub fn add_type_requirement(&mut self, var: Variable, ty: ValueType) { - if let Some(existing) = self.required_types.insert(var.clone(), ty) { - // If we already have a required type for `var`, we're empty. - if existing != ty { - self.mark_known_empty(EmptyBecause::TypeMismatch { - var: var.clone(), - existing: ValueTypeSet::of_one(existing), - desired: ValueTypeSet::of_one(ty) - }); - } + /// Require that `var` be one of the types in `types`. If any existing + /// type requirements exist for `var`, the requirement after this + /// function returns will be the intersection of the requested types and + /// the type requirements in place prior to calling `add_type_requirement`. + /// + /// If the intersection will leave the variable so that it cannot be any + /// type, we'll call mark_known_empty. + pub fn add_type_requirement(&mut self, var: Variable, types: ValueTypeSet) { + let existing = self.required_types.get(&var).cloned().unwrap_or(ValueTypeSet::any()); + + // We have an existing requirement. The new requirement will be + // the intersection, but we'll mark_known_empty if that's empty. + let intersection = types.intersection(&existing); + if intersection.is_empty() { + self.mark_known_empty(EmptyBecause::TypeMismatch { + var: var.clone(), + existing: existing, + desired: types, + }); } + self.required_types.insert(var, intersection); } /// Like `constrain_var_to_type` but in reverse: this expands the set of types @@ -872,34 +882,56 @@ impl ConjoiningClauses { } pub fn process_required_types(&mut self) -> Result<()> { + if self.empty_because.is_some() { + return Ok(()) + } // We can't call `mark_known_empty` inside the loop since it would be a // mutable borrow on self while we're iterating over `self.required_types`. // Doing it like this avoids needing to copy `self.required_types`. let mut empty_because: Option = None; - for (var, &ty) in self.required_types.iter() { - if let Some(&already_known) = self.known_types.get(var) { - if already_known.exemplar() == Some(ty) { - // If we're already certain the type and the constraint are - // the same, then there's no need to constrain anything. - continue; - } - if !already_known.contains(ty) && empty_because.is_none() { + for (var, types) in self.required_types.iter() { + if let Some(already_known) = self.known_types.get(var) { + if already_known.is_disjoint(types) { // If we know the constraint can't be one of the types // the variable could take, then we know we're empty. empty_because = Some(EmptyBecause::TypeMismatch { var: var.clone(), - existing: already_known, - desired: ValueTypeSet::of_one(ty) + existing: already_known.clone(), + desired: types.clone(), }); break; } + if already_known.is_subset(types) { + // TODO: I'm not convinced that we can do nothing here. + // + // Consider `[:find ?x ?v :where [_ _ ?v] [(> ?v 10)] [?x :foo/long ?v]]`. + // + // That will produce SQL like: + // + // ``` + // SELECT datoms01.e AS `?x`, datoms00.v AS `?v` + // FROM datoms datoms00, datoms01 + // WHERE datoms00.v > 10 + // AND datoms01.v = datoms00.v + // AND datoms01.value_type_tag = datoms00.value_type_tag + // AND datoms01.a = 65537 + // ``` + // + // Which is not optimal — the left side of the join will + // produce lots of spurious bindings for datoms00.v. + // + // See https://github.com/mozilla/mentat/issues/520, and + // https://github.com/mozilla/mentat/issues/293. + continue; + } } + let qa = self.extracted_types - .get(&var) - .ok_or_else(|| Error::from_kind(ErrorKind::UnboundVariable(var.name())))?; - self.wheres.add_intersection(ColumnConstraint::HasType { + .get(&var) + .ok_or_else(|| Error::from_kind(ErrorKind::UnboundVariable(var.name())))?; + self.wheres.add_intersection(ColumnConstraint::HasTypes { value: qa.0.clone(), - value_type: ty, + value_types: *types, strict: true, }); } diff --git a/query-algebrizer/src/clauses/not.rs b/query-algebrizer/src/clauses/not.rs index c8e1e9ea..cd7ff46f 100644 --- a/query-algebrizer/src/clauses/not.rs +++ b/query-algebrizer/src/clauses/not.rs @@ -53,10 +53,21 @@ impl ConjoiningClauses { template.apply_clause(&schema, clause)?; } - template.expand_column_bindings(); - template.prune_extracted_types(); - template.process_required_types()?; + if template.is_known_empty() { + return Ok(()); + } + template.expand_column_bindings(); + if template.is_known_empty() { + return Ok(()); + } + + template.prune_extracted_types(); + if template.is_known_empty() { + return Ok(()); + } + + template.process_required_types()?; if template.is_known_empty() { return Ok(()); } diff --git a/query-algebrizer/src/clauses/predicate.rs b/query-algebrizer/src/clauses/predicate.rs index e1e66635..427774e1 100644 --- a/query-algebrizer/src/clauses/predicate.rs +++ b/query-algebrizer/src/clauses/predicate.rs @@ -53,7 +53,7 @@ impl ConjoiningClauses { /// There are several kinds of predicates in our Datalog: /// - A limited set of binary comparison operators: < > <= >= !=. /// These are converted into SQLite binary comparisons and some type constraints. - /// - A set of type requirements constraining their argument to be a specific ValueType + /// - A set of type requirements constraining their argument to be a specific ValueType. /// - In the future, some predicates that are implemented via function calls in SQLite. /// /// At present we have implemented only the five built-in comparison binary operators. @@ -83,7 +83,7 @@ impl ConjoiningClauses { let mut args = pred.args.into_iter(); if let FnArg::Variable(v) = args.next().unwrap() { - self.add_type_requirement(v, ty); + self.add_type_requirement(v, ValueTypeSet::of_one(ty)); Ok(()) } else { bail!(ErrorKind::InvalidArgument(pred.operator.clone(), "variable".into(), 0)) diff --git a/query-algebrizer/src/types.rs b/query-algebrizer/src/types.rs index 1fddc7f5..d692fc3e 100644 --- a/query-algebrizer/src/types.rs +++ b/query-algebrizer/src/types.rs @@ -334,9 +334,9 @@ pub enum ColumnConstraint { left: QueryValue, right: QueryValue, }, - HasType { + HasTypes { value: TableAlias, - value_type: ValueType, + value_types: ValueTypeSet, strict: bool, }, NotExists(ComputedTable), @@ -345,7 +345,11 @@ pub enum ColumnConstraint { impl ColumnConstraint { pub fn has_type(value: TableAlias, value_type: ValueType) -> ColumnConstraint { - ColumnConstraint::HasType { value, value_type, strict: false } + ColumnConstraint::HasTypes { + value, + value_types: ValueTypeSet::of_one(value_type), + strict: false + } } } @@ -461,14 +465,20 @@ impl Debug for ColumnConstraint { write!(f, "{:?} MATCHES {:?}", qa, thing) }, - &HasType { ref value, value_type, strict } => { - write!(f, "({:?}.value_type_tag = {:?}", value, value_type)?; - if strict && value_type == ValueType::Double || value_type == ValueType::Long { - write!(f, " AND typeof({:?}) = '{:?}')", value, - if value_type == ValueType::Double { "real" } else { "integer" }) - } else { - write!(f, ")") + &HasTypes { ref value, ref value_types, strict } => { + // This is cludgey, but it's debug code. + write!(f, "(")?; + for value_type in value_types.iter() { + write!(f, "({:?}.value_type_tag = {:?}", value, value_type)?; + if strict && value_type == ValueType::Double || value_type == ValueType::Long { + write!(f, " AND typeof({:?}) = '{:?}')", value, + if value_type == ValueType::Double { "real" } else { "integer" })?; + } else { + write!(f, ")")?; + } + write!(f, " OR ")?; } + write!(f, "1)") }, &NotExists(ref ct) => { write!(f, "NOT EXISTS {:?}", ct) diff --git a/query-algebrizer/tests/fulltext.rs b/query-algebrizer/tests/fulltext.rs index 3195a10c..8b2cb198 100644 --- a/query-algebrizer/tests/fulltext.rs +++ b/query-algebrizer/tests/fulltext.rs @@ -13,7 +13,7 @@ extern crate mentat_query; extern crate mentat_query_algebrizer; extern crate mentat_query_parser; -pub mod utils; +mod utils; use mentat_core::{ Attribute, diff --git a/query-algebrizer/tests/ground.rs b/query-algebrizer/tests/ground.rs index a799ecb8..de9eed1f 100644 --- a/query-algebrizer/tests/ground.rs +++ b/query-algebrizer/tests/ground.rs @@ -13,7 +13,7 @@ extern crate mentat_query; extern crate mentat_query_algebrizer; extern crate mentat_query_parser; -pub mod utils; +mod utils; use std::collections::BTreeMap; diff --git a/query-algebrizer/tests/predicate.rs b/query-algebrizer/tests/predicate.rs index d8178400..8ad2f0e3 100644 --- a/query-algebrizer/tests/predicate.rs +++ b/query-algebrizer/tests/predicate.rs @@ -13,7 +13,7 @@ extern crate mentat_query; extern crate mentat_query_algebrizer; extern crate mentat_query_parser; -pub mod utils; +mod utils; use mentat_core::{ Attribute, diff --git a/query-algebrizer/tests/type_reqs.rs b/query-algebrizer/tests/type_reqs.rs index 39e8772a..43c1ee0a 100644 --- a/query-algebrizer/tests/type_reqs.rs +++ b/query-algebrizer/tests/type_reqs.rs @@ -13,7 +13,7 @@ extern crate mentat_query; extern crate mentat_query_algebrizer; extern crate mentat_query_parser; -pub mod utils; +mod utils; use utils::{ alg, diff --git a/query-algebrizer/tests/utils/mod.rs b/query-algebrizer/tests/utils/mod.rs index 4a5aea49..2fd9872d 100644 --- a/query-algebrizer/tests/utils/mod.rs +++ b/query-algebrizer/tests/utils/mod.rs @@ -8,6 +8,11 @@ // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. +// This is required to prevent warnings about unused functions in this file just +// because it's unused in a single file (tests that don't use every function in +// this module will get warnings otherwise). +#![allow(dead_code)] + use mentat_core::{ Attribute, Entid, @@ -31,10 +36,7 @@ use mentat_query_algebrizer::{ QueryInputs, }; -// Common utility functions used in multiple test files. Note: Import this with -// `pub mod utils` (not `mod utils`), or you'll get spurious unused function -// warnings when functions exist in this file but are only used by modules that -// don't import with `pub` (yes, this is annoying). +// Common utility functions used in multiple test files. // These are helpers that tests use to build Schema instances. pub fn associate_ident(schema: &mut Schema, i: NamespacedKeyword, e: Entid) { @@ -95,4 +97,3 @@ pub fn alg(schema: &Schema, input: &str) -> ConjoiningClauses { let parsed = parse_find_string(input).expect("query input to have parsed"); algebrize(schema.into(), parsed).expect("algebrizing to have succeeded").cc } - diff --git a/query-translator/src/translate.rs b/query-translator/src/translate.rs index cda43888..aceb9dd3 100644 --- a/query-translator/src/translate.rs +++ b/query-translator/src/translate.rs @@ -12,6 +12,7 @@ use mentat_core::{ SQLValueType, TypedValue, ValueType, + ValueTypeSet, }; use mentat_query::Limit; @@ -158,30 +159,53 @@ impl ToConstraint for ColumnConstraint { right: right.into(), } }, - HasType { value: table, value_type, strict } => { - let type_column = QualifiedAlias::new(table.clone(), DatomsColumn::ValueTypeTag).to_column(); - let loose = Constraint::equal(type_column, - ColumnOrExpression::Integer(value_type.value_type_tag())); - if !strict || (value_type != ValueType::Long && value_type != ValueType::Double) { - loose + HasTypes { value: table, value_types, strict: strict_requested } => { + // If strict mode checking is on, and need to check exactly 1 + // (not both or neither) of ValueType::Double and ValueType::Long + // we emit a Constraint::TypeCheck. + let num_numeric_checks = (value_types.contains(ValueType::Double) as i32) + + (value_types.contains(ValueType::Long) as i32); + let strict = strict_requested && num_numeric_checks == 1; + + let types = if !strict && num_numeric_checks == 2 { + // If we aren't operating in strict mode (either because it + // wasn't requested, or because it doesn't make a difference), + // and both ValueType::Double and ValueType::Long are being + // checked, we remove the test for one of them, as they're + // represented using the same value type tag (we choose to + // remove the check for ValueType::Double, but it's an + // arbitrary choice) + value_types.difference(&ValueTypeSet::of_one(ValueType::Double)) } else { - // HasType has requested that we check for strict equality, and we're - // checking a ValueType where that makes a difference (a numeric type). - let val_column = QualifiedAlias::new(table, DatomsColumn::Value).to_column(); - Constraint::And { - constraints: vec![ - loose, - Constraint::TypeCheck { - value: val_column, - datatype: match value_type { - ValueType::Long => SQLDatatype::Integer, - ValueType::Double => SQLDatatype::Real, - _ => unreachable!() + value_types + }; + let constraints = types.into_iter().map(|ty| { + let type_column = QualifiedAlias::new(table.clone(), DatomsColumn::ValueTypeTag).to_column(); + let loose = Constraint::equal(type_column, ColumnOrExpression::Integer(ty.value_type_tag())); + if !strict || (ty != ValueType::Long && ty != ValueType::Double) { + loose + } else { + let val_column = QualifiedAlias::new(table.clone(), DatomsColumn::Value).to_column(); + // We're handling strict equality for a numeric type, so we need to emit + // a `typeof(col) = '(real|integer)'` too. This should happen a maximum of + // once per `HasTypes` + Constraint::And { + constraints: vec![ + loose, + Constraint::TypeCheck { + value: val_column, + datatype: match ty { + ValueType::Long => SQLDatatype::Integer, + ValueType::Double => SQLDatatype::Real, + _ => unreachable!() + } } - } - ] + ] + } } - } + }).collect::>(); + + Constraint::Or { constraints } }, NotExists(computed_table) => { diff --git a/query-translator/tests/translate.rs b/query-translator/tests/translate.rs index 267aed73..5185c1df 100644 --- a/query-translator/tests/translate.rs +++ b/query-translator/tests/translate.rs @@ -209,7 +209,7 @@ fn test_unknown_attribute_keyword_value() { let SQLQuery { sql, args } = translate(&schema, query); // Only match keywords, not strings: tag = 13. - assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = $v0 AND `datoms00`.value_type_tag = 13"); + assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = $v0 AND (`datoms00`.value_type_tag = 13)"); assert_eq!(args, vec![make_arg("$v0", ":ab/yyy")]); } @@ -222,7 +222,7 @@ fn test_unknown_attribute_string_value() { // We expect all_datoms because we're querying for a string. Magic, that. // We don't want keywords etc., so tag = 10. - assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x` FROM `all_datoms` AS `all_datoms00` WHERE `all_datoms00`.v = $v0 AND `all_datoms00`.value_type_tag = 10"); + assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x` FROM `all_datoms` AS `all_datoms00` WHERE `all_datoms00`.v = $v0 AND (`all_datoms00`.value_type_tag = 10)"); assert_eq!(args, vec![make_arg("$v0", "horses")]); } @@ -235,7 +235,7 @@ fn test_unknown_attribute_double_value() { // In general, doubles _could_ be 1.0, which might match a boolean or a ref. Set tag = 5 to // make sure we only match numbers. - assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = 9.95e0 AND `datoms00`.value_type_tag = 5"); + assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = 9.95e0 AND (`datoms00`.value_type_tag = 5)"); assert_eq!(args, vec![]); } @@ -286,6 +286,50 @@ fn test_unknown_ident() { assert_eq!("SELECT 1 LIMIT 0", sql); } +#[test] +fn test_type_required_long() { + let schema = Schema::default(); + + let query = r#"[:find ?x :where [?x _ ?e] [(long ?e)]]"#; + let SQLQuery { sql, args } = translate(&schema, query); + + assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x` \ + FROM `all_datoms` AS `all_datoms00` \ + WHERE ((`all_datoms00`.value_type_tag = 5 AND \ + typeof(`all_datoms00`.v) = 'integer'))"); + + assert_eq!(args, vec![]); +} + +#[test] +fn test_type_required_double() { + let schema = Schema::default(); + + let query = r#"[:find ?x :where [?x _ ?e] [(double ?e)]]"#; + let SQLQuery { sql, args } = translate(&schema, query); + + assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x` \ + FROM `all_datoms` AS `all_datoms00` \ + WHERE ((`all_datoms00`.value_type_tag = 5 AND \ + typeof(`all_datoms00`.v) = 'real'))"); + + assert_eq!(args, vec![]); +} + +#[test] +fn test_type_required_boolean() { + let schema = Schema::default(); + + let query = r#"[:find ?x :where [?x _ ?e] [(boolean ?e)]]"#; + let SQLQuery { sql, args } = translate(&schema, query); + + assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x` \ + FROM `all_datoms` AS `all_datoms00` \ + WHERE (`all_datoms00`.value_type_tag = 1)"); + + assert_eq!(args, vec![]); +} + #[test] fn test_numeric_less_than_unknown_attribute() { let schema = Schema::default(); @@ -751,7 +795,7 @@ fn test_unbound_attribute_with_ground() { `all_datoms00`.value_type_tag AS `?v_value_type_tag` \ FROM `all_datoms` AS `all_datoms00` \ WHERE NOT EXISTS (SELECT 1 WHERE `all_datoms00`.v = 17 AND \ - `all_datoms00`.value_type_tag = 5)"); + (`all_datoms00`.value_type_tag = 5))"); }