Allow required_types to constrain a variable be be one of several types.

This commit is contained in:
Thom Chiovoloni 2018-01-18 20:50:20 -05:00
parent f3dc922571
commit c9e2ff8146
12 changed files with 211 additions and 74 deletions

View file

@ -412,6 +412,12 @@ impl ValueTypeSet {
ValueTypeSet(self.0.intersection(other.0))
}
/// Returns the set difference between `self` and `other`, which is the
/// set of items in `self` that are not in `other`.
pub fn difference(&self, other: &ValueTypeSet) -> ValueTypeSet {
ValueTypeSet(self.0 - other.0)
}
/// Return an arbitrary type that's part of this set.
/// For a set containing a single type, this will be that type.
pub fn exemplar(&self) -> Option<ValueType> {
@ -422,6 +428,11 @@ impl ValueTypeSet {
self.0.is_subset(&other.0)
}
/// Returns true if `self` and `other` contain no items in common.
pub fn is_disjoint(&self, other: &ValueTypeSet) -> bool {
self.0.is_disjoint(&other.0)
}
pub fn contains(&self, vt: ValueType) -> bool {
self.0.contains(&vt)
}
@ -433,6 +444,10 @@ impl ValueTypeSet {
pub fn is_unit(&self) -> bool {
self.0.len() == 1
}
pub fn iter(&self) -> ::enum_set::Iter<ValueType> {
self.0.iter()
}
}
impl IntoIterator for ValueTypeSet {

View file

@ -218,7 +218,7 @@ pub struct ConjoiningClauses {
pub extracted_types: BTreeMap<Variable, QualifiedAlias>,
/// Map of variables to the set of type requirements we have for them.
required_types: BTreeMap<Variable, ValueType>,
required_types: BTreeMap<Variable, ValueTypeSet>,
}
impl PartialEq for ConjoiningClauses {
@ -551,17 +551,27 @@ impl ConjoiningClauses {
}
}
pub fn add_type_requirement(&mut self, var: Variable, ty: ValueType) {
if let Some(existing) = self.required_types.insert(var.clone(), ty) {
// If we already have a required type for `var`, we're empty.
if existing != ty {
self.mark_known_empty(EmptyBecause::TypeMismatch {
var: var.clone(),
existing: ValueTypeSet::of_one(existing),
desired: ValueTypeSet::of_one(ty)
});
}
/// Require that `var` be one of the types in `types`. If any existing
/// type requirements exist for `var`, the requirement after this
/// function returns will be the intersection of the requested types and
/// the type requirements in place prior to calling `add_type_requirement`.
///
/// If the intersection will leave the variable so that it cannot be any
/// type, we'll call mark_known_empty.
pub fn add_type_requirement(&mut self, var: Variable, types: ValueTypeSet) {
let existing = self.required_types.get(&var).cloned().unwrap_or(ValueTypeSet::any());
// We have an existing requirement. The new requirement will be
// the intersection, but we'll mark_known_empty if that's empty.
let intersection = types.intersection(&existing);
if intersection.is_empty() {
self.mark_known_empty(EmptyBecause::TypeMismatch {
var: var.clone(),
existing: existing,
desired: types,
});
}
self.required_types.insert(var, intersection);
}
/// Like `constrain_var_to_type` but in reverse: this expands the set of types
@ -872,34 +882,56 @@ impl ConjoiningClauses {
}
pub fn process_required_types(&mut self) -> Result<()> {
if self.empty_because.is_some() {
return Ok(())
}
// We can't call `mark_known_empty` inside the loop since it would be a
// mutable borrow on self while we're iterating over `self.required_types`.
// Doing it like this avoids needing to copy `self.required_types`.
let mut empty_because: Option<EmptyBecause> = None;
for (var, &ty) in self.required_types.iter() {
if let Some(&already_known) = self.known_types.get(var) {
if already_known.exemplar() == Some(ty) {
// If we're already certain the type and the constraint are
// the same, then there's no need to constrain anything.
continue;
}
if !already_known.contains(ty) && empty_because.is_none() {
for (var, types) in self.required_types.iter() {
if let Some(already_known) = self.known_types.get(var) {
if already_known.is_disjoint(types) {
// If we know the constraint can't be one of the types
// the variable could take, then we know we're empty.
empty_because = Some(EmptyBecause::TypeMismatch {
var: var.clone(),
existing: already_known,
desired: ValueTypeSet::of_one(ty)
existing: already_known.clone(),
desired: types.clone(),
});
break;
}
if already_known.is_subset(types) {
// TODO: I'm not convinced that we can do nothing here.
//
// Consider `[:find ?x ?v :where [_ _ ?v] [(> ?v 10)] [?x :foo/long ?v]]`.
//
// That will produce SQL like:
//
// ```
// SELECT datoms01.e AS `?x`, datoms00.v AS `?v`
// FROM datoms datoms00, datoms01
// WHERE datoms00.v > 10
// AND datoms01.v = datoms00.v
// AND datoms01.value_type_tag = datoms00.value_type_tag
// AND datoms01.a = 65537
// ```
//
// Which is not optimal — the left side of the join will
// produce lots of spurious bindings for datoms00.v.
//
// See https://github.com/mozilla/mentat/issues/520, and
// https://github.com/mozilla/mentat/issues/293.
continue;
}
}
let qa = self.extracted_types
.get(&var)
.ok_or_else(|| Error::from_kind(ErrorKind::UnboundVariable(var.name())))?;
self.wheres.add_intersection(ColumnConstraint::HasType {
.get(&var)
.ok_or_else(|| Error::from_kind(ErrorKind::UnboundVariable(var.name())))?;
self.wheres.add_intersection(ColumnConstraint::HasTypes {
value: qa.0.clone(),
value_type: ty,
value_types: *types,
strict: true,
});
}

View file

@ -53,10 +53,21 @@ impl ConjoiningClauses {
template.apply_clause(&schema, clause)?;
}
template.expand_column_bindings();
template.prune_extracted_types();
template.process_required_types()?;
if template.is_known_empty() {
return Ok(());
}
template.expand_column_bindings();
if template.is_known_empty() {
return Ok(());
}
template.prune_extracted_types();
if template.is_known_empty() {
return Ok(());
}
template.process_required_types()?;
if template.is_known_empty() {
return Ok(());
}

View file

@ -53,7 +53,7 @@ impl ConjoiningClauses {
/// There are several kinds of predicates in our Datalog:
/// - A limited set of binary comparison operators: < > <= >= !=.
/// These are converted into SQLite binary comparisons and some type constraints.
/// - A set of type requirements constraining their argument to be a specific ValueType
/// - A set of type requirements constraining their argument to be a specific ValueType.
/// - In the future, some predicates that are implemented via function calls in SQLite.
///
/// At present we have implemented only the five built-in comparison binary operators.
@ -83,7 +83,7 @@ impl ConjoiningClauses {
let mut args = pred.args.into_iter();
if let FnArg::Variable(v) = args.next().unwrap() {
self.add_type_requirement(v, ty);
self.add_type_requirement(v, ValueTypeSet::of_one(ty));
Ok(())
} else {
bail!(ErrorKind::InvalidArgument(pred.operator.clone(), "variable".into(), 0))

View file

@ -334,9 +334,9 @@ pub enum ColumnConstraint {
left: QueryValue,
right: QueryValue,
},
HasType {
HasTypes {
value: TableAlias,
value_type: ValueType,
value_types: ValueTypeSet,
strict: bool,
},
NotExists(ComputedTable),
@ -345,7 +345,11 @@ pub enum ColumnConstraint {
impl ColumnConstraint {
pub fn has_type(value: TableAlias, value_type: ValueType) -> ColumnConstraint {
ColumnConstraint::HasType { value, value_type, strict: false }
ColumnConstraint::HasTypes {
value,
value_types: ValueTypeSet::of_one(value_type),
strict: false
}
}
}
@ -461,14 +465,20 @@ impl Debug for ColumnConstraint {
write!(f, "{:?} MATCHES {:?}", qa, thing)
},
&HasType { ref value, value_type, strict } => {
write!(f, "({:?}.value_type_tag = {:?}", value, value_type)?;
if strict && value_type == ValueType::Double || value_type == ValueType::Long {
write!(f, " AND typeof({:?}) = '{:?}')", value,
if value_type == ValueType::Double { "real" } else { "integer" })
} else {
write!(f, ")")
&HasTypes { ref value, ref value_types, strict } => {
// This is cludgey, but it's debug code.
write!(f, "(")?;
for value_type in value_types.iter() {
write!(f, "({:?}.value_type_tag = {:?}", value, value_type)?;
if strict && value_type == ValueType::Double || value_type == ValueType::Long {
write!(f, " AND typeof({:?}) = '{:?}')", value,
if value_type == ValueType::Double { "real" } else { "integer" })?;
} else {
write!(f, ")")?;
}
write!(f, " OR ")?;
}
write!(f, "1)")
},
&NotExists(ref ct) => {
write!(f, "NOT EXISTS {:?}", ct)

View file

@ -13,7 +13,7 @@ extern crate mentat_query;
extern crate mentat_query_algebrizer;
extern crate mentat_query_parser;
pub mod utils;
mod utils;
use mentat_core::{
Attribute,

View file

@ -13,7 +13,7 @@ extern crate mentat_query;
extern crate mentat_query_algebrizer;
extern crate mentat_query_parser;
pub mod utils;
mod utils;
use std::collections::BTreeMap;

View file

@ -13,7 +13,7 @@ extern crate mentat_query;
extern crate mentat_query_algebrizer;
extern crate mentat_query_parser;
pub mod utils;
mod utils;
use mentat_core::{
Attribute,

View file

@ -13,7 +13,7 @@ extern crate mentat_query;
extern crate mentat_query_algebrizer;
extern crate mentat_query_parser;
pub mod utils;
mod utils;
use utils::{
alg,

View file

@ -8,6 +8,11 @@
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
// This is required to prevent warnings about unused functions in this file just
// because it's unused in a single file (tests that don't use every function in
// this module will get warnings otherwise).
#![allow(dead_code)]
use mentat_core::{
Attribute,
Entid,
@ -31,10 +36,7 @@ use mentat_query_algebrizer::{
QueryInputs,
};
// Common utility functions used in multiple test files. Note: Import this with
// `pub mod utils` (not `mod utils`), or you'll get spurious unused function
// warnings when functions exist in this file but are only used by modules that
// don't import with `pub` (yes, this is annoying).
// Common utility functions used in multiple test files.
// These are helpers that tests use to build Schema instances.
pub fn associate_ident(schema: &mut Schema, i: NamespacedKeyword, e: Entid) {
@ -95,4 +97,3 @@ pub fn alg(schema: &Schema, input: &str) -> ConjoiningClauses {
let parsed = parse_find_string(input).expect("query input to have parsed");
algebrize(schema.into(), parsed).expect("algebrizing to have succeeded").cc
}

View file

@ -12,6 +12,7 @@ use mentat_core::{
SQLValueType,
TypedValue,
ValueType,
ValueTypeSet,
};
use mentat_query::Limit;
@ -158,30 +159,53 @@ impl ToConstraint for ColumnConstraint {
right: right.into(),
}
},
HasType { value: table, value_type, strict } => {
let type_column = QualifiedAlias::new(table.clone(), DatomsColumn::ValueTypeTag).to_column();
let loose = Constraint::equal(type_column,
ColumnOrExpression::Integer(value_type.value_type_tag()));
if !strict || (value_type != ValueType::Long && value_type != ValueType::Double) {
loose
HasTypes { value: table, value_types, strict: strict_requested } => {
// If strict mode checking is on, and need to check exactly 1
// (not both or neither) of ValueType::Double and ValueType::Long
// we emit a Constraint::TypeCheck.
let num_numeric_checks = (value_types.contains(ValueType::Double) as i32) +
(value_types.contains(ValueType::Long) as i32);
let strict = strict_requested && num_numeric_checks == 1;
let types = if !strict && num_numeric_checks == 2 {
// If we aren't operating in strict mode (either because it
// wasn't requested, or because it doesn't make a difference),
// and both ValueType::Double and ValueType::Long are being
// checked, we remove the test for one of them, as they're
// represented using the same value type tag (we choose to
// remove the check for ValueType::Double, but it's an
// arbitrary choice)
value_types.difference(&ValueTypeSet::of_one(ValueType::Double))
} else {
// HasType has requested that we check for strict equality, and we're
// checking a ValueType where that makes a difference (a numeric type).
let val_column = QualifiedAlias::new(table, DatomsColumn::Value).to_column();
Constraint::And {
constraints: vec![
loose,
Constraint::TypeCheck {
value: val_column,
datatype: match value_type {
ValueType::Long => SQLDatatype::Integer,
ValueType::Double => SQLDatatype::Real,
_ => unreachable!()
value_types
};
let constraints = types.into_iter().map(|ty| {
let type_column = QualifiedAlias::new(table.clone(), DatomsColumn::ValueTypeTag).to_column();
let loose = Constraint::equal(type_column, ColumnOrExpression::Integer(ty.value_type_tag()));
if !strict || (ty != ValueType::Long && ty != ValueType::Double) {
loose
} else {
let val_column = QualifiedAlias::new(table.clone(), DatomsColumn::Value).to_column();
// We're handling strict equality for a numeric type, so we need to emit
// a `typeof(col) = '(real|integer)'` too. This should happen a maximum of
// once per `HasTypes`
Constraint::And {
constraints: vec![
loose,
Constraint::TypeCheck {
value: val_column,
datatype: match ty {
ValueType::Long => SQLDatatype::Integer,
ValueType::Double => SQLDatatype::Real,
_ => unreachable!()
}
}
}
]
]
}
}
}
}).collect::<Vec<_>>();
Constraint::Or { constraints }
},
NotExists(computed_table) => {

View file

@ -209,7 +209,7 @@ fn test_unknown_attribute_keyword_value() {
let SQLQuery { sql, args } = translate(&schema, query);
// Only match keywords, not strings: tag = 13.
assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = $v0 AND `datoms00`.value_type_tag = 13");
assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = $v0 AND (`datoms00`.value_type_tag = 13)");
assert_eq!(args, vec![make_arg("$v0", ":ab/yyy")]);
}
@ -222,7 +222,7 @@ fn test_unknown_attribute_string_value() {
// We expect all_datoms because we're querying for a string. Magic, that.
// We don't want keywords etc., so tag = 10.
assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x` FROM `all_datoms` AS `all_datoms00` WHERE `all_datoms00`.v = $v0 AND `all_datoms00`.value_type_tag = 10");
assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x` FROM `all_datoms` AS `all_datoms00` WHERE `all_datoms00`.v = $v0 AND (`all_datoms00`.value_type_tag = 10)");
assert_eq!(args, vec![make_arg("$v0", "horses")]);
}
@ -235,7 +235,7 @@ fn test_unknown_attribute_double_value() {
// In general, doubles _could_ be 1.0, which might match a boolean or a ref. Set tag = 5 to
// make sure we only match numbers.
assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = 9.95e0 AND `datoms00`.value_type_tag = 5");
assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = 9.95e0 AND (`datoms00`.value_type_tag = 5)");
assert_eq!(args, vec![]);
}
@ -286,6 +286,50 @@ fn test_unknown_ident() {
assert_eq!("SELECT 1 LIMIT 0", sql);
}
#[test]
fn test_type_required_long() {
let schema = Schema::default();
let query = r#"[:find ?x :where [?x _ ?e] [(long ?e)]]"#;
let SQLQuery { sql, args } = translate(&schema, query);
assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x` \
FROM `all_datoms` AS `all_datoms00` \
WHERE ((`all_datoms00`.value_type_tag = 5 AND \
typeof(`all_datoms00`.v) = 'integer'))");
assert_eq!(args, vec![]);
}
#[test]
fn test_type_required_double() {
let schema = Schema::default();
let query = r#"[:find ?x :where [?x _ ?e] [(double ?e)]]"#;
let SQLQuery { sql, args } = translate(&schema, query);
assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x` \
FROM `all_datoms` AS `all_datoms00` \
WHERE ((`all_datoms00`.value_type_tag = 5 AND \
typeof(`all_datoms00`.v) = 'real'))");
assert_eq!(args, vec![]);
}
#[test]
fn test_type_required_boolean() {
let schema = Schema::default();
let query = r#"[:find ?x :where [?x _ ?e] [(boolean ?e)]]"#;
let SQLQuery { sql, args } = translate(&schema, query);
assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x` \
FROM `all_datoms` AS `all_datoms00` \
WHERE (`all_datoms00`.value_type_tag = 1)");
assert_eq!(args, vec![]);
}
#[test]
fn test_numeric_less_than_unknown_attribute() {
let schema = Schema::default();
@ -751,7 +795,7 @@ fn test_unbound_attribute_with_ground() {
`all_datoms00`.value_type_tag AS `?v_value_type_tag` \
FROM `all_datoms` AS `all_datoms00` \
WHERE NOT EXISTS (SELECT 1 WHERE `all_datoms00`.v = 17 AND \
`all_datoms00`.value_type_tag = 5)");
(`all_datoms00`.value_type_tag = 5))");
}