Allow required_types to constrain a variable be be one of several types.

This commit is contained in:
Thom Chiovoloni 2018-01-18 20:50:20 -05:00
parent f3dc922571
commit c9e2ff8146
12 changed files with 211 additions and 74 deletions

View file

@ -412,6 +412,12 @@ impl ValueTypeSet {
ValueTypeSet(self.0.intersection(other.0)) ValueTypeSet(self.0.intersection(other.0))
} }
/// Returns the set difference between `self` and `other`, which is the
/// set of items in `self` that are not in `other`.
pub fn difference(&self, other: &ValueTypeSet) -> ValueTypeSet {
ValueTypeSet(self.0 - other.0)
}
/// Return an arbitrary type that's part of this set. /// Return an arbitrary type that's part of this set.
/// For a set containing a single type, this will be that type. /// For a set containing a single type, this will be that type.
pub fn exemplar(&self) -> Option<ValueType> { pub fn exemplar(&self) -> Option<ValueType> {
@ -422,6 +428,11 @@ impl ValueTypeSet {
self.0.is_subset(&other.0) self.0.is_subset(&other.0)
} }
/// Returns true if `self` and `other` contain no items in common.
pub fn is_disjoint(&self, other: &ValueTypeSet) -> bool {
self.0.is_disjoint(&other.0)
}
pub fn contains(&self, vt: ValueType) -> bool { pub fn contains(&self, vt: ValueType) -> bool {
self.0.contains(&vt) self.0.contains(&vt)
} }
@ -433,6 +444,10 @@ impl ValueTypeSet {
pub fn is_unit(&self) -> bool { pub fn is_unit(&self) -> bool {
self.0.len() == 1 self.0.len() == 1
} }
pub fn iter(&self) -> ::enum_set::Iter<ValueType> {
self.0.iter()
}
} }
impl IntoIterator for ValueTypeSet { impl IntoIterator for ValueTypeSet {

View file

@ -218,7 +218,7 @@ pub struct ConjoiningClauses {
pub extracted_types: BTreeMap<Variable, QualifiedAlias>, pub extracted_types: BTreeMap<Variable, QualifiedAlias>,
/// Map of variables to the set of type requirements we have for them. /// Map of variables to the set of type requirements we have for them.
required_types: BTreeMap<Variable, ValueType>, required_types: BTreeMap<Variable, ValueTypeSet>,
} }
impl PartialEq for ConjoiningClauses { impl PartialEq for ConjoiningClauses {
@ -551,17 +551,27 @@ impl ConjoiningClauses {
} }
} }
pub fn add_type_requirement(&mut self, var: Variable, ty: ValueType) { /// Require that `var` be one of the types in `types`. If any existing
if let Some(existing) = self.required_types.insert(var.clone(), ty) { /// type requirements exist for `var`, the requirement after this
// If we already have a required type for `var`, we're empty. /// function returns will be the intersection of the requested types and
if existing != ty { /// the type requirements in place prior to calling `add_type_requirement`.
self.mark_known_empty(EmptyBecause::TypeMismatch { ///
var: var.clone(), /// If the intersection will leave the variable so that it cannot be any
existing: ValueTypeSet::of_one(existing), /// type, we'll call mark_known_empty.
desired: ValueTypeSet::of_one(ty) pub fn add_type_requirement(&mut self, var: Variable, types: ValueTypeSet) {
}); let existing = self.required_types.get(&var).cloned().unwrap_or(ValueTypeSet::any());
}
// We have an existing requirement. The new requirement will be
// the intersection, but we'll mark_known_empty if that's empty.
let intersection = types.intersection(&existing);
if intersection.is_empty() {
self.mark_known_empty(EmptyBecause::TypeMismatch {
var: var.clone(),
existing: existing,
desired: types,
});
} }
self.required_types.insert(var, intersection);
} }
/// Like `constrain_var_to_type` but in reverse: this expands the set of types /// Like `constrain_var_to_type` but in reverse: this expands the set of types
@ -872,34 +882,56 @@ impl ConjoiningClauses {
} }
pub fn process_required_types(&mut self) -> Result<()> { pub fn process_required_types(&mut self) -> Result<()> {
if self.empty_because.is_some() {
return Ok(())
}
// We can't call `mark_known_empty` inside the loop since it would be a // We can't call `mark_known_empty` inside the loop since it would be a
// mutable borrow on self while we're iterating over `self.required_types`. // mutable borrow on self while we're iterating over `self.required_types`.
// Doing it like this avoids needing to copy `self.required_types`. // Doing it like this avoids needing to copy `self.required_types`.
let mut empty_because: Option<EmptyBecause> = None; let mut empty_because: Option<EmptyBecause> = None;
for (var, &ty) in self.required_types.iter() { for (var, types) in self.required_types.iter() {
if let Some(&already_known) = self.known_types.get(var) { if let Some(already_known) = self.known_types.get(var) {
if already_known.exemplar() == Some(ty) { if already_known.is_disjoint(types) {
// If we're already certain the type and the constraint are
// the same, then there's no need to constrain anything.
continue;
}
if !already_known.contains(ty) && empty_because.is_none() {
// If we know the constraint can't be one of the types // If we know the constraint can't be one of the types
// the variable could take, then we know we're empty. // the variable could take, then we know we're empty.
empty_because = Some(EmptyBecause::TypeMismatch { empty_because = Some(EmptyBecause::TypeMismatch {
var: var.clone(), var: var.clone(),
existing: already_known, existing: already_known.clone(),
desired: ValueTypeSet::of_one(ty) desired: types.clone(),
}); });
break; break;
} }
if already_known.is_subset(types) {
// TODO: I'm not convinced that we can do nothing here.
//
// Consider `[:find ?x ?v :where [_ _ ?v] [(> ?v 10)] [?x :foo/long ?v]]`.
//
// That will produce SQL like:
//
// ```
// SELECT datoms01.e AS `?x`, datoms00.v AS `?v`
// FROM datoms datoms00, datoms01
// WHERE datoms00.v > 10
// AND datoms01.v = datoms00.v
// AND datoms01.value_type_tag = datoms00.value_type_tag
// AND datoms01.a = 65537
// ```
//
// Which is not optimal — the left side of the join will
// produce lots of spurious bindings for datoms00.v.
//
// See https://github.com/mozilla/mentat/issues/520, and
// https://github.com/mozilla/mentat/issues/293.
continue;
}
} }
let qa = self.extracted_types let qa = self.extracted_types
.get(&var) .get(&var)
.ok_or_else(|| Error::from_kind(ErrorKind::UnboundVariable(var.name())))?; .ok_or_else(|| Error::from_kind(ErrorKind::UnboundVariable(var.name())))?;
self.wheres.add_intersection(ColumnConstraint::HasType { self.wheres.add_intersection(ColumnConstraint::HasTypes {
value: qa.0.clone(), value: qa.0.clone(),
value_type: ty, value_types: *types,
strict: true, strict: true,
}); });
} }

View file

@ -53,10 +53,21 @@ impl ConjoiningClauses {
template.apply_clause(&schema, clause)?; template.apply_clause(&schema, clause)?;
} }
template.expand_column_bindings(); if template.is_known_empty() {
template.prune_extracted_types(); return Ok(());
template.process_required_types()?; }
template.expand_column_bindings();
if template.is_known_empty() {
return Ok(());
}
template.prune_extracted_types();
if template.is_known_empty() {
return Ok(());
}
template.process_required_types()?;
if template.is_known_empty() { if template.is_known_empty() {
return Ok(()); return Ok(());
} }

View file

@ -53,7 +53,7 @@ impl ConjoiningClauses {
/// There are several kinds of predicates in our Datalog: /// There are several kinds of predicates in our Datalog:
/// - A limited set of binary comparison operators: < > <= >= !=. /// - A limited set of binary comparison operators: < > <= >= !=.
/// These are converted into SQLite binary comparisons and some type constraints. /// These are converted into SQLite binary comparisons and some type constraints.
/// - A set of type requirements constraining their argument to be a specific ValueType /// - A set of type requirements constraining their argument to be a specific ValueType.
/// - In the future, some predicates that are implemented via function calls in SQLite. /// - In the future, some predicates that are implemented via function calls in SQLite.
/// ///
/// At present we have implemented only the five built-in comparison binary operators. /// At present we have implemented only the five built-in comparison binary operators.
@ -83,7 +83,7 @@ impl ConjoiningClauses {
let mut args = pred.args.into_iter(); let mut args = pred.args.into_iter();
if let FnArg::Variable(v) = args.next().unwrap() { if let FnArg::Variable(v) = args.next().unwrap() {
self.add_type_requirement(v, ty); self.add_type_requirement(v, ValueTypeSet::of_one(ty));
Ok(()) Ok(())
} else { } else {
bail!(ErrorKind::InvalidArgument(pred.operator.clone(), "variable".into(), 0)) bail!(ErrorKind::InvalidArgument(pred.operator.clone(), "variable".into(), 0))

View file

@ -334,9 +334,9 @@ pub enum ColumnConstraint {
left: QueryValue, left: QueryValue,
right: QueryValue, right: QueryValue,
}, },
HasType { HasTypes {
value: TableAlias, value: TableAlias,
value_type: ValueType, value_types: ValueTypeSet,
strict: bool, strict: bool,
}, },
NotExists(ComputedTable), NotExists(ComputedTable),
@ -345,7 +345,11 @@ pub enum ColumnConstraint {
impl ColumnConstraint { impl ColumnConstraint {
pub fn has_type(value: TableAlias, value_type: ValueType) -> ColumnConstraint { pub fn has_type(value: TableAlias, value_type: ValueType) -> ColumnConstraint {
ColumnConstraint::HasType { value, value_type, strict: false } ColumnConstraint::HasTypes {
value,
value_types: ValueTypeSet::of_one(value_type),
strict: false
}
} }
} }
@ -461,14 +465,20 @@ impl Debug for ColumnConstraint {
write!(f, "{:?} MATCHES {:?}", qa, thing) write!(f, "{:?} MATCHES {:?}", qa, thing)
}, },
&HasType { ref value, value_type, strict } => { &HasTypes { ref value, ref value_types, strict } => {
write!(f, "({:?}.value_type_tag = {:?}", value, value_type)?; // This is cludgey, but it's debug code.
if strict && value_type == ValueType::Double || value_type == ValueType::Long { write!(f, "(")?;
write!(f, " AND typeof({:?}) = '{:?}')", value, for value_type in value_types.iter() {
if value_type == ValueType::Double { "real" } else { "integer" }) write!(f, "({:?}.value_type_tag = {:?}", value, value_type)?;
} else { if strict && value_type == ValueType::Double || value_type == ValueType::Long {
write!(f, ")") write!(f, " AND typeof({:?}) = '{:?}')", value,
if value_type == ValueType::Double { "real" } else { "integer" })?;
} else {
write!(f, ")")?;
}
write!(f, " OR ")?;
} }
write!(f, "1)")
}, },
&NotExists(ref ct) => { &NotExists(ref ct) => {
write!(f, "NOT EXISTS {:?}", ct) write!(f, "NOT EXISTS {:?}", ct)

View file

@ -13,7 +13,7 @@ extern crate mentat_query;
extern crate mentat_query_algebrizer; extern crate mentat_query_algebrizer;
extern crate mentat_query_parser; extern crate mentat_query_parser;
pub mod utils; mod utils;
use mentat_core::{ use mentat_core::{
Attribute, Attribute,

View file

@ -13,7 +13,7 @@ extern crate mentat_query;
extern crate mentat_query_algebrizer; extern crate mentat_query_algebrizer;
extern crate mentat_query_parser; extern crate mentat_query_parser;
pub mod utils; mod utils;
use std::collections::BTreeMap; use std::collections::BTreeMap;

View file

@ -13,7 +13,7 @@ extern crate mentat_query;
extern crate mentat_query_algebrizer; extern crate mentat_query_algebrizer;
extern crate mentat_query_parser; extern crate mentat_query_parser;
pub mod utils; mod utils;
use mentat_core::{ use mentat_core::{
Attribute, Attribute,

View file

@ -13,7 +13,7 @@ extern crate mentat_query;
extern crate mentat_query_algebrizer; extern crate mentat_query_algebrizer;
extern crate mentat_query_parser; extern crate mentat_query_parser;
pub mod utils; mod utils;
use utils::{ use utils::{
alg, alg,

View file

@ -8,6 +8,11 @@
// CONDITIONS OF ANY KIND, either express or implied. See the License for the // CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License. // specific language governing permissions and limitations under the License.
// This is required to prevent warnings about unused functions in this file just
// because it's unused in a single file (tests that don't use every function in
// this module will get warnings otherwise).
#![allow(dead_code)]
use mentat_core::{ use mentat_core::{
Attribute, Attribute,
Entid, Entid,
@ -31,10 +36,7 @@ use mentat_query_algebrizer::{
QueryInputs, QueryInputs,
}; };
// Common utility functions used in multiple test files. Note: Import this with // Common utility functions used in multiple test files.
// `pub mod utils` (not `mod utils`), or you'll get spurious unused function
// warnings when functions exist in this file but are only used by modules that
// don't import with `pub` (yes, this is annoying).
// These are helpers that tests use to build Schema instances. // These are helpers that tests use to build Schema instances.
pub fn associate_ident(schema: &mut Schema, i: NamespacedKeyword, e: Entid) { pub fn associate_ident(schema: &mut Schema, i: NamespacedKeyword, e: Entid) {
@ -95,4 +97,3 @@ pub fn alg(schema: &Schema, input: &str) -> ConjoiningClauses {
let parsed = parse_find_string(input).expect("query input to have parsed"); let parsed = parse_find_string(input).expect("query input to have parsed");
algebrize(schema.into(), parsed).expect("algebrizing to have succeeded").cc algebrize(schema.into(), parsed).expect("algebrizing to have succeeded").cc
} }

View file

@ -12,6 +12,7 @@ use mentat_core::{
SQLValueType, SQLValueType,
TypedValue, TypedValue,
ValueType, ValueType,
ValueTypeSet,
}; };
use mentat_query::Limit; use mentat_query::Limit;
@ -158,30 +159,53 @@ impl ToConstraint for ColumnConstraint {
right: right.into(), right: right.into(),
} }
}, },
HasType { value: table, value_type, strict } => { HasTypes { value: table, value_types, strict: strict_requested } => {
let type_column = QualifiedAlias::new(table.clone(), DatomsColumn::ValueTypeTag).to_column(); // If strict mode checking is on, and need to check exactly 1
let loose = Constraint::equal(type_column, // (not both or neither) of ValueType::Double and ValueType::Long
ColumnOrExpression::Integer(value_type.value_type_tag())); // we emit a Constraint::TypeCheck.
if !strict || (value_type != ValueType::Long && value_type != ValueType::Double) { let num_numeric_checks = (value_types.contains(ValueType::Double) as i32) +
loose (value_types.contains(ValueType::Long) as i32);
let strict = strict_requested && num_numeric_checks == 1;
let types = if !strict && num_numeric_checks == 2 {
// If we aren't operating in strict mode (either because it
// wasn't requested, or because it doesn't make a difference),
// and both ValueType::Double and ValueType::Long are being
// checked, we remove the test for one of them, as they're
// represented using the same value type tag (we choose to
// remove the check for ValueType::Double, but it's an
// arbitrary choice)
value_types.difference(&ValueTypeSet::of_one(ValueType::Double))
} else { } else {
// HasType has requested that we check for strict equality, and we're value_types
// checking a ValueType where that makes a difference (a numeric type). };
let val_column = QualifiedAlias::new(table, DatomsColumn::Value).to_column(); let constraints = types.into_iter().map(|ty| {
Constraint::And { let type_column = QualifiedAlias::new(table.clone(), DatomsColumn::ValueTypeTag).to_column();
constraints: vec![ let loose = Constraint::equal(type_column, ColumnOrExpression::Integer(ty.value_type_tag()));
loose, if !strict || (ty != ValueType::Long && ty != ValueType::Double) {
Constraint::TypeCheck { loose
value: val_column, } else {
datatype: match value_type { let val_column = QualifiedAlias::new(table.clone(), DatomsColumn::Value).to_column();
ValueType::Long => SQLDatatype::Integer, // We're handling strict equality for a numeric type, so we need to emit
ValueType::Double => SQLDatatype::Real, // a `typeof(col) = '(real|integer)'` too. This should happen a maximum of
_ => unreachable!() // once per `HasTypes`
Constraint::And {
constraints: vec![
loose,
Constraint::TypeCheck {
value: val_column,
datatype: match ty {
ValueType::Long => SQLDatatype::Integer,
ValueType::Double => SQLDatatype::Real,
_ => unreachable!()
}
} }
} ]
] }
} }
} }).collect::<Vec<_>>();
Constraint::Or { constraints }
}, },
NotExists(computed_table) => { NotExists(computed_table) => {

View file

@ -209,7 +209,7 @@ fn test_unknown_attribute_keyword_value() {
let SQLQuery { sql, args } = translate(&schema, query); let SQLQuery { sql, args } = translate(&schema, query);
// Only match keywords, not strings: tag = 13. // Only match keywords, not strings: tag = 13.
assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = $v0 AND `datoms00`.value_type_tag = 13"); assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = $v0 AND (`datoms00`.value_type_tag = 13)");
assert_eq!(args, vec![make_arg("$v0", ":ab/yyy")]); assert_eq!(args, vec![make_arg("$v0", ":ab/yyy")]);
} }
@ -222,7 +222,7 @@ fn test_unknown_attribute_string_value() {
// We expect all_datoms because we're querying for a string. Magic, that. // We expect all_datoms because we're querying for a string. Magic, that.
// We don't want keywords etc., so tag = 10. // We don't want keywords etc., so tag = 10.
assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x` FROM `all_datoms` AS `all_datoms00` WHERE `all_datoms00`.v = $v0 AND `all_datoms00`.value_type_tag = 10"); assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x` FROM `all_datoms` AS `all_datoms00` WHERE `all_datoms00`.v = $v0 AND (`all_datoms00`.value_type_tag = 10)");
assert_eq!(args, vec![make_arg("$v0", "horses")]); assert_eq!(args, vec![make_arg("$v0", "horses")]);
} }
@ -235,7 +235,7 @@ fn test_unknown_attribute_double_value() {
// In general, doubles _could_ be 1.0, which might match a boolean or a ref. Set tag = 5 to // In general, doubles _could_ be 1.0, which might match a boolean or a ref. Set tag = 5 to
// make sure we only match numbers. // make sure we only match numbers.
assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = 9.95e0 AND `datoms00`.value_type_tag = 5"); assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = 9.95e0 AND (`datoms00`.value_type_tag = 5)");
assert_eq!(args, vec![]); assert_eq!(args, vec![]);
} }
@ -286,6 +286,50 @@ fn test_unknown_ident() {
assert_eq!("SELECT 1 LIMIT 0", sql); assert_eq!("SELECT 1 LIMIT 0", sql);
} }
#[test]
fn test_type_required_long() {
let schema = Schema::default();
let query = r#"[:find ?x :where [?x _ ?e] [(long ?e)]]"#;
let SQLQuery { sql, args } = translate(&schema, query);
assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x` \
FROM `all_datoms` AS `all_datoms00` \
WHERE ((`all_datoms00`.value_type_tag = 5 AND \
typeof(`all_datoms00`.v) = 'integer'))");
assert_eq!(args, vec![]);
}
#[test]
fn test_type_required_double() {
let schema = Schema::default();
let query = r#"[:find ?x :where [?x _ ?e] [(double ?e)]]"#;
let SQLQuery { sql, args } = translate(&schema, query);
assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x` \
FROM `all_datoms` AS `all_datoms00` \
WHERE ((`all_datoms00`.value_type_tag = 5 AND \
typeof(`all_datoms00`.v) = 'real'))");
assert_eq!(args, vec![]);
}
#[test]
fn test_type_required_boolean() {
let schema = Schema::default();
let query = r#"[:find ?x :where [?x _ ?e] [(boolean ?e)]]"#;
let SQLQuery { sql, args } = translate(&schema, query);
assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x` \
FROM `all_datoms` AS `all_datoms00` \
WHERE (`all_datoms00`.value_type_tag = 1)");
assert_eq!(args, vec![]);
}
#[test] #[test]
fn test_numeric_less_than_unknown_attribute() { fn test_numeric_less_than_unknown_attribute() {
let schema = Schema::default(); let schema = Schema::default();
@ -751,7 +795,7 @@ fn test_unbound_attribute_with_ground() {
`all_datoms00`.value_type_tag AS `?v_value_type_tag` \ `all_datoms00`.value_type_tag AS `?v_value_type_tag` \
FROM `all_datoms` AS `all_datoms00` \ FROM `all_datoms` AS `all_datoms00` \
WHERE NOT EXISTS (SELECT 1 WHERE `all_datoms00`.v = 17 AND \ WHERE NOT EXISTS (SELECT 1 WHERE `all_datoms00`.v = 17 AND \
`all_datoms00`.value_type_tag = 5)"); (`all_datoms00`.value_type_tag = 5))");
} }