Allow required_types to constrain a variable be be one of several types.
This commit is contained in:
parent
f3dc922571
commit
c9e2ff8146
12 changed files with 211 additions and 74 deletions
|
@ -412,6 +412,12 @@ impl ValueTypeSet {
|
|||
ValueTypeSet(self.0.intersection(other.0))
|
||||
}
|
||||
|
||||
/// Returns the set difference between `self` and `other`, which is the
|
||||
/// set of items in `self` that are not in `other`.
|
||||
pub fn difference(&self, other: &ValueTypeSet) -> ValueTypeSet {
|
||||
ValueTypeSet(self.0 - other.0)
|
||||
}
|
||||
|
||||
/// Return an arbitrary type that's part of this set.
|
||||
/// For a set containing a single type, this will be that type.
|
||||
pub fn exemplar(&self) -> Option<ValueType> {
|
||||
|
@ -422,6 +428,11 @@ impl ValueTypeSet {
|
|||
self.0.is_subset(&other.0)
|
||||
}
|
||||
|
||||
/// Returns true if `self` and `other` contain no items in common.
|
||||
pub fn is_disjoint(&self, other: &ValueTypeSet) -> bool {
|
||||
self.0.is_disjoint(&other.0)
|
||||
}
|
||||
|
||||
pub fn contains(&self, vt: ValueType) -> bool {
|
||||
self.0.contains(&vt)
|
||||
}
|
||||
|
@ -433,6 +444,10 @@ impl ValueTypeSet {
|
|||
pub fn is_unit(&self) -> bool {
|
||||
self.0.len() == 1
|
||||
}
|
||||
|
||||
pub fn iter(&self) -> ::enum_set::Iter<ValueType> {
|
||||
self.0.iter()
|
||||
}
|
||||
}
|
||||
|
||||
impl IntoIterator for ValueTypeSet {
|
||||
|
|
|
@ -218,7 +218,7 @@ pub struct ConjoiningClauses {
|
|||
pub extracted_types: BTreeMap<Variable, QualifiedAlias>,
|
||||
|
||||
/// Map of variables to the set of type requirements we have for them.
|
||||
required_types: BTreeMap<Variable, ValueType>,
|
||||
required_types: BTreeMap<Variable, ValueTypeSet>,
|
||||
}
|
||||
|
||||
impl PartialEq for ConjoiningClauses {
|
||||
|
@ -551,17 +551,27 @@ impl ConjoiningClauses {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn add_type_requirement(&mut self, var: Variable, ty: ValueType) {
|
||||
if let Some(existing) = self.required_types.insert(var.clone(), ty) {
|
||||
// If we already have a required type for `var`, we're empty.
|
||||
if existing != ty {
|
||||
self.mark_known_empty(EmptyBecause::TypeMismatch {
|
||||
var: var.clone(),
|
||||
existing: ValueTypeSet::of_one(existing),
|
||||
desired: ValueTypeSet::of_one(ty)
|
||||
});
|
||||
}
|
||||
/// Require that `var` be one of the types in `types`. If any existing
|
||||
/// type requirements exist for `var`, the requirement after this
|
||||
/// function returns will be the intersection of the requested types and
|
||||
/// the type requirements in place prior to calling `add_type_requirement`.
|
||||
///
|
||||
/// If the intersection will leave the variable so that it cannot be any
|
||||
/// type, we'll call mark_known_empty.
|
||||
pub fn add_type_requirement(&mut self, var: Variable, types: ValueTypeSet) {
|
||||
let existing = self.required_types.get(&var).cloned().unwrap_or(ValueTypeSet::any());
|
||||
|
||||
// We have an existing requirement. The new requirement will be
|
||||
// the intersection, but we'll mark_known_empty if that's empty.
|
||||
let intersection = types.intersection(&existing);
|
||||
if intersection.is_empty() {
|
||||
self.mark_known_empty(EmptyBecause::TypeMismatch {
|
||||
var: var.clone(),
|
||||
existing: existing,
|
||||
desired: types,
|
||||
});
|
||||
}
|
||||
self.required_types.insert(var, intersection);
|
||||
}
|
||||
|
||||
/// Like `constrain_var_to_type` but in reverse: this expands the set of types
|
||||
|
@ -872,34 +882,56 @@ impl ConjoiningClauses {
|
|||
}
|
||||
|
||||
pub fn process_required_types(&mut self) -> Result<()> {
|
||||
if self.empty_because.is_some() {
|
||||
return Ok(())
|
||||
}
|
||||
// We can't call `mark_known_empty` inside the loop since it would be a
|
||||
// mutable borrow on self while we're iterating over `self.required_types`.
|
||||
// Doing it like this avoids needing to copy `self.required_types`.
|
||||
let mut empty_because: Option<EmptyBecause> = None;
|
||||
for (var, &ty) in self.required_types.iter() {
|
||||
if let Some(&already_known) = self.known_types.get(var) {
|
||||
if already_known.exemplar() == Some(ty) {
|
||||
// If we're already certain the type and the constraint are
|
||||
// the same, then there's no need to constrain anything.
|
||||
continue;
|
||||
}
|
||||
if !already_known.contains(ty) && empty_because.is_none() {
|
||||
for (var, types) in self.required_types.iter() {
|
||||
if let Some(already_known) = self.known_types.get(var) {
|
||||
if already_known.is_disjoint(types) {
|
||||
// If we know the constraint can't be one of the types
|
||||
// the variable could take, then we know we're empty.
|
||||
empty_because = Some(EmptyBecause::TypeMismatch {
|
||||
var: var.clone(),
|
||||
existing: already_known,
|
||||
desired: ValueTypeSet::of_one(ty)
|
||||
existing: already_known.clone(),
|
||||
desired: types.clone(),
|
||||
});
|
||||
break;
|
||||
}
|
||||
if already_known.is_subset(types) {
|
||||
// TODO: I'm not convinced that we can do nothing here.
|
||||
//
|
||||
// Consider `[:find ?x ?v :where [_ _ ?v] [(> ?v 10)] [?x :foo/long ?v]]`.
|
||||
//
|
||||
// That will produce SQL like:
|
||||
//
|
||||
// ```
|
||||
// SELECT datoms01.e AS `?x`, datoms00.v AS `?v`
|
||||
// FROM datoms datoms00, datoms01
|
||||
// WHERE datoms00.v > 10
|
||||
// AND datoms01.v = datoms00.v
|
||||
// AND datoms01.value_type_tag = datoms00.value_type_tag
|
||||
// AND datoms01.a = 65537
|
||||
// ```
|
||||
//
|
||||
// Which is not optimal — the left side of the join will
|
||||
// produce lots of spurious bindings for datoms00.v.
|
||||
//
|
||||
// See https://github.com/mozilla/mentat/issues/520, and
|
||||
// https://github.com/mozilla/mentat/issues/293.
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
let qa = self.extracted_types
|
||||
.get(&var)
|
||||
.ok_or_else(|| Error::from_kind(ErrorKind::UnboundVariable(var.name())))?;
|
||||
self.wheres.add_intersection(ColumnConstraint::HasType {
|
||||
.get(&var)
|
||||
.ok_or_else(|| Error::from_kind(ErrorKind::UnboundVariable(var.name())))?;
|
||||
self.wheres.add_intersection(ColumnConstraint::HasTypes {
|
||||
value: qa.0.clone(),
|
||||
value_type: ty,
|
||||
value_types: *types,
|
||||
strict: true,
|
||||
});
|
||||
}
|
||||
|
|
|
@ -53,10 +53,21 @@ impl ConjoiningClauses {
|
|||
template.apply_clause(&schema, clause)?;
|
||||
}
|
||||
|
||||
template.expand_column_bindings();
|
||||
template.prune_extracted_types();
|
||||
template.process_required_types()?;
|
||||
if template.is_known_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
template.expand_column_bindings();
|
||||
if template.is_known_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
template.prune_extracted_types();
|
||||
if template.is_known_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
template.process_required_types()?;
|
||||
if template.is_known_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
|
|
@ -53,7 +53,7 @@ impl ConjoiningClauses {
|
|||
/// There are several kinds of predicates in our Datalog:
|
||||
/// - A limited set of binary comparison operators: < > <= >= !=.
|
||||
/// These are converted into SQLite binary comparisons and some type constraints.
|
||||
/// - A set of type requirements constraining their argument to be a specific ValueType
|
||||
/// - A set of type requirements constraining their argument to be a specific ValueType.
|
||||
/// - In the future, some predicates that are implemented via function calls in SQLite.
|
||||
///
|
||||
/// At present we have implemented only the five built-in comparison binary operators.
|
||||
|
@ -83,7 +83,7 @@ impl ConjoiningClauses {
|
|||
let mut args = pred.args.into_iter();
|
||||
|
||||
if let FnArg::Variable(v) = args.next().unwrap() {
|
||||
self.add_type_requirement(v, ty);
|
||||
self.add_type_requirement(v, ValueTypeSet::of_one(ty));
|
||||
Ok(())
|
||||
} else {
|
||||
bail!(ErrorKind::InvalidArgument(pred.operator.clone(), "variable".into(), 0))
|
||||
|
|
|
@ -334,9 +334,9 @@ pub enum ColumnConstraint {
|
|||
left: QueryValue,
|
||||
right: QueryValue,
|
||||
},
|
||||
HasType {
|
||||
HasTypes {
|
||||
value: TableAlias,
|
||||
value_type: ValueType,
|
||||
value_types: ValueTypeSet,
|
||||
strict: bool,
|
||||
},
|
||||
NotExists(ComputedTable),
|
||||
|
@ -345,7 +345,11 @@ pub enum ColumnConstraint {
|
|||
|
||||
impl ColumnConstraint {
|
||||
pub fn has_type(value: TableAlias, value_type: ValueType) -> ColumnConstraint {
|
||||
ColumnConstraint::HasType { value, value_type, strict: false }
|
||||
ColumnConstraint::HasTypes {
|
||||
value,
|
||||
value_types: ValueTypeSet::of_one(value_type),
|
||||
strict: false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -461,14 +465,20 @@ impl Debug for ColumnConstraint {
|
|||
write!(f, "{:?} MATCHES {:?}", qa, thing)
|
||||
},
|
||||
|
||||
&HasType { ref value, value_type, strict } => {
|
||||
write!(f, "({:?}.value_type_tag = {:?}", value, value_type)?;
|
||||
if strict && value_type == ValueType::Double || value_type == ValueType::Long {
|
||||
write!(f, " AND typeof({:?}) = '{:?}')", value,
|
||||
if value_type == ValueType::Double { "real" } else { "integer" })
|
||||
} else {
|
||||
write!(f, ")")
|
||||
&HasTypes { ref value, ref value_types, strict } => {
|
||||
// This is cludgey, but it's debug code.
|
||||
write!(f, "(")?;
|
||||
for value_type in value_types.iter() {
|
||||
write!(f, "({:?}.value_type_tag = {:?}", value, value_type)?;
|
||||
if strict && value_type == ValueType::Double || value_type == ValueType::Long {
|
||||
write!(f, " AND typeof({:?}) = '{:?}')", value,
|
||||
if value_type == ValueType::Double { "real" } else { "integer" })?;
|
||||
} else {
|
||||
write!(f, ")")?;
|
||||
}
|
||||
write!(f, " OR ")?;
|
||||
}
|
||||
write!(f, "1)")
|
||||
},
|
||||
&NotExists(ref ct) => {
|
||||
write!(f, "NOT EXISTS {:?}", ct)
|
||||
|
|
|
@ -13,7 +13,7 @@ extern crate mentat_query;
|
|||
extern crate mentat_query_algebrizer;
|
||||
extern crate mentat_query_parser;
|
||||
|
||||
pub mod utils;
|
||||
mod utils;
|
||||
|
||||
use mentat_core::{
|
||||
Attribute,
|
||||
|
|
|
@ -13,7 +13,7 @@ extern crate mentat_query;
|
|||
extern crate mentat_query_algebrizer;
|
||||
extern crate mentat_query_parser;
|
||||
|
||||
pub mod utils;
|
||||
mod utils;
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@ extern crate mentat_query;
|
|||
extern crate mentat_query_algebrizer;
|
||||
extern crate mentat_query_parser;
|
||||
|
||||
pub mod utils;
|
||||
mod utils;
|
||||
|
||||
use mentat_core::{
|
||||
Attribute,
|
||||
|
|
|
@ -13,7 +13,7 @@ extern crate mentat_query;
|
|||
extern crate mentat_query_algebrizer;
|
||||
extern crate mentat_query_parser;
|
||||
|
||||
pub mod utils;
|
||||
mod utils;
|
||||
|
||||
use utils::{
|
||||
alg,
|
||||
|
|
|
@ -8,6 +8,11 @@
|
|||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
|
||||
// This is required to prevent warnings about unused functions in this file just
|
||||
// because it's unused in a single file (tests that don't use every function in
|
||||
// this module will get warnings otherwise).
|
||||
#![allow(dead_code)]
|
||||
|
||||
use mentat_core::{
|
||||
Attribute,
|
||||
Entid,
|
||||
|
@ -31,10 +36,7 @@ use mentat_query_algebrizer::{
|
|||
QueryInputs,
|
||||
};
|
||||
|
||||
// Common utility functions used in multiple test files. Note: Import this with
|
||||
// `pub mod utils` (not `mod utils`), or you'll get spurious unused function
|
||||
// warnings when functions exist in this file but are only used by modules that
|
||||
// don't import with `pub` (yes, this is annoying).
|
||||
// Common utility functions used in multiple test files.
|
||||
|
||||
// These are helpers that tests use to build Schema instances.
|
||||
pub fn associate_ident(schema: &mut Schema, i: NamespacedKeyword, e: Entid) {
|
||||
|
@ -95,4 +97,3 @@ pub fn alg(schema: &Schema, input: &str) -> ConjoiningClauses {
|
|||
let parsed = parse_find_string(input).expect("query input to have parsed");
|
||||
algebrize(schema.into(), parsed).expect("algebrizing to have succeeded").cc
|
||||
}
|
||||
|
||||
|
|
|
@ -12,6 +12,7 @@ use mentat_core::{
|
|||
SQLValueType,
|
||||
TypedValue,
|
||||
ValueType,
|
||||
ValueTypeSet,
|
||||
};
|
||||
|
||||
use mentat_query::Limit;
|
||||
|
@ -158,30 +159,53 @@ impl ToConstraint for ColumnConstraint {
|
|||
right: right.into(),
|
||||
}
|
||||
},
|
||||
HasType { value: table, value_type, strict } => {
|
||||
let type_column = QualifiedAlias::new(table.clone(), DatomsColumn::ValueTypeTag).to_column();
|
||||
let loose = Constraint::equal(type_column,
|
||||
ColumnOrExpression::Integer(value_type.value_type_tag()));
|
||||
if !strict || (value_type != ValueType::Long && value_type != ValueType::Double) {
|
||||
loose
|
||||
HasTypes { value: table, value_types, strict: strict_requested } => {
|
||||
// If strict mode checking is on, and need to check exactly 1
|
||||
// (not both or neither) of ValueType::Double and ValueType::Long
|
||||
// we emit a Constraint::TypeCheck.
|
||||
let num_numeric_checks = (value_types.contains(ValueType::Double) as i32) +
|
||||
(value_types.contains(ValueType::Long) as i32);
|
||||
let strict = strict_requested && num_numeric_checks == 1;
|
||||
|
||||
let types = if !strict && num_numeric_checks == 2 {
|
||||
// If we aren't operating in strict mode (either because it
|
||||
// wasn't requested, or because it doesn't make a difference),
|
||||
// and both ValueType::Double and ValueType::Long are being
|
||||
// checked, we remove the test for one of them, as they're
|
||||
// represented using the same value type tag (we choose to
|
||||
// remove the check for ValueType::Double, but it's an
|
||||
// arbitrary choice)
|
||||
value_types.difference(&ValueTypeSet::of_one(ValueType::Double))
|
||||
} else {
|
||||
// HasType has requested that we check for strict equality, and we're
|
||||
// checking a ValueType where that makes a difference (a numeric type).
|
||||
let val_column = QualifiedAlias::new(table, DatomsColumn::Value).to_column();
|
||||
Constraint::And {
|
||||
constraints: vec![
|
||||
loose,
|
||||
Constraint::TypeCheck {
|
||||
value: val_column,
|
||||
datatype: match value_type {
|
||||
ValueType::Long => SQLDatatype::Integer,
|
||||
ValueType::Double => SQLDatatype::Real,
|
||||
_ => unreachable!()
|
||||
value_types
|
||||
};
|
||||
let constraints = types.into_iter().map(|ty| {
|
||||
let type_column = QualifiedAlias::new(table.clone(), DatomsColumn::ValueTypeTag).to_column();
|
||||
let loose = Constraint::equal(type_column, ColumnOrExpression::Integer(ty.value_type_tag()));
|
||||
if !strict || (ty != ValueType::Long && ty != ValueType::Double) {
|
||||
loose
|
||||
} else {
|
||||
let val_column = QualifiedAlias::new(table.clone(), DatomsColumn::Value).to_column();
|
||||
// We're handling strict equality for a numeric type, so we need to emit
|
||||
// a `typeof(col) = '(real|integer)'` too. This should happen a maximum of
|
||||
// once per `HasTypes`
|
||||
Constraint::And {
|
||||
constraints: vec![
|
||||
loose,
|
||||
Constraint::TypeCheck {
|
||||
value: val_column,
|
||||
datatype: match ty {
|
||||
ValueType::Long => SQLDatatype::Integer,
|
||||
ValueType::Double => SQLDatatype::Real,
|
||||
_ => unreachable!()
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}).collect::<Vec<_>>();
|
||||
|
||||
Constraint::Or { constraints }
|
||||
},
|
||||
|
||||
NotExists(computed_table) => {
|
||||
|
|
|
@ -209,7 +209,7 @@ fn test_unknown_attribute_keyword_value() {
|
|||
let SQLQuery { sql, args } = translate(&schema, query);
|
||||
|
||||
// Only match keywords, not strings: tag = 13.
|
||||
assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = $v0 AND `datoms00`.value_type_tag = 13");
|
||||
assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = $v0 AND (`datoms00`.value_type_tag = 13)");
|
||||
assert_eq!(args, vec![make_arg("$v0", ":ab/yyy")]);
|
||||
}
|
||||
|
||||
|
@ -222,7 +222,7 @@ fn test_unknown_attribute_string_value() {
|
|||
|
||||
// We expect all_datoms because we're querying for a string. Magic, that.
|
||||
// We don't want keywords etc., so tag = 10.
|
||||
assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x` FROM `all_datoms` AS `all_datoms00` WHERE `all_datoms00`.v = $v0 AND `all_datoms00`.value_type_tag = 10");
|
||||
assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x` FROM `all_datoms` AS `all_datoms00` WHERE `all_datoms00`.v = $v0 AND (`all_datoms00`.value_type_tag = 10)");
|
||||
assert_eq!(args, vec![make_arg("$v0", "horses")]);
|
||||
}
|
||||
|
||||
|
@ -235,7 +235,7 @@ fn test_unknown_attribute_double_value() {
|
|||
|
||||
// In general, doubles _could_ be 1.0, which might match a boolean or a ref. Set tag = 5 to
|
||||
// make sure we only match numbers.
|
||||
assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = 9.95e0 AND `datoms00`.value_type_tag = 5");
|
||||
assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = 9.95e0 AND (`datoms00`.value_type_tag = 5)");
|
||||
assert_eq!(args, vec![]);
|
||||
}
|
||||
|
||||
|
@ -286,6 +286,50 @@ fn test_unknown_ident() {
|
|||
assert_eq!("SELECT 1 LIMIT 0", sql);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_type_required_long() {
|
||||
let schema = Schema::default();
|
||||
|
||||
let query = r#"[:find ?x :where [?x _ ?e] [(long ?e)]]"#;
|
||||
let SQLQuery { sql, args } = translate(&schema, query);
|
||||
|
||||
assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x` \
|
||||
FROM `all_datoms` AS `all_datoms00` \
|
||||
WHERE ((`all_datoms00`.value_type_tag = 5 AND \
|
||||
typeof(`all_datoms00`.v) = 'integer'))");
|
||||
|
||||
assert_eq!(args, vec![]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_type_required_double() {
|
||||
let schema = Schema::default();
|
||||
|
||||
let query = r#"[:find ?x :where [?x _ ?e] [(double ?e)]]"#;
|
||||
let SQLQuery { sql, args } = translate(&schema, query);
|
||||
|
||||
assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x` \
|
||||
FROM `all_datoms` AS `all_datoms00` \
|
||||
WHERE ((`all_datoms00`.value_type_tag = 5 AND \
|
||||
typeof(`all_datoms00`.v) = 'real'))");
|
||||
|
||||
assert_eq!(args, vec![]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_type_required_boolean() {
|
||||
let schema = Schema::default();
|
||||
|
||||
let query = r#"[:find ?x :where [?x _ ?e] [(boolean ?e)]]"#;
|
||||
let SQLQuery { sql, args } = translate(&schema, query);
|
||||
|
||||
assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x` \
|
||||
FROM `all_datoms` AS `all_datoms00` \
|
||||
WHERE (`all_datoms00`.value_type_tag = 1)");
|
||||
|
||||
assert_eq!(args, vec![]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_numeric_less_than_unknown_attribute() {
|
||||
let schema = Schema::default();
|
||||
|
@ -751,7 +795,7 @@ fn test_unbound_attribute_with_ground() {
|
|||
`all_datoms00`.value_type_tag AS `?v_value_type_tag` \
|
||||
FROM `all_datoms` AS `all_datoms00` \
|
||||
WHERE NOT EXISTS (SELECT 1 WHERE `all_datoms00`.v = 17 AND \
|
||||
`all_datoms00`.value_type_tag = 5)");
|
||||
(`all_datoms00`.value_type_tag = 5))");
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Reference in a new issue