Support input bindings in ConjoiningClauses. r=nalexander

This commit is contained in:
Richard Newman 2017-03-09 14:59:11 -08:00
parent 914902cf9e
commit 6109a63249
2 changed files with 218 additions and 47 deletions

View file

@ -18,7 +18,11 @@ use std::fmt::{
Formatter,
Result,
};
use std::collections::BTreeMap;
use std::collections::{
BTreeMap,
BTreeSet,
};
use std::collections::btree_map::Entry;
use self::mentat_core::{
@ -198,6 +202,7 @@ impl Debug for ColumnConstraint {
pub struct ConjoiningClauses {
/// `true` if this set of clauses cannot yield results in the context of the current schema.
pub is_known_empty: bool,
pub empty_because: Option<EmptyBecause>,
/// A function used to generate an alias for a table -- e.g., from "datoms" to "datoms123".
aliaser: TableAliaser,
@ -209,25 +214,82 @@ pub struct ConjoiningClauses {
pub wheres: Vec<ColumnConstraint>,
/// A map from var to qualified columns. Used to project.
pub bindings: BTreeMap<Variable, Vec<QualifiedAlias>>,
pub column_bindings: BTreeMap<Variable, Vec<QualifiedAlias>>,
/// A list of variables mentioned in the enclosing query's :in clause. These must all be bound
/// before the query can be executed. TODO: clarify what this means for nested CCs.
pub input_variables: BTreeSet<Variable>,
/// In some situations -- e.g., when a query is being run only once -- we know in advance the
/// values bound to some or all variables. These can be substituted directly when the query is
/// algebrized.
///
/// Value bindings must agree with `known_types`. If you write a query like
/// ```edn
/// [:find ?x :in $ ?val :where [?x :foo/int ?val]]
/// ```
///
/// and for `?val` provide `TypedValue::String("foo".to_string())`, the query will be known at
/// algebrizing time to be empty.
value_bindings: BTreeMap<Variable, TypedValue>,
/// A map from var to type. Whenever a var maps unambiguously to two different types, it cannot
/// yield results, so we don't represent that case here. If a var isn't present in the map, it
/// means that its type is not known in advance.
pub known_types: BTreeMap<Variable, ValueType>,
/// A mapping, similar to `bindings`, but used to pull type tags out of the store at runtime.
/// A mapping, similar to `column_bindings`, but used to pull type tags out of the store at runtime.
/// If a var isn't present in `known_types`, it should be present here.
extracted_types: BTreeMap<Variable, QualifiedAlias>,
}
pub enum EmptyBecause {
// Var, existing, desired.
TypeMismatch(Variable, ValueType, ValueType),
UnresolvedIdent(NamespacedKeyword),
InvalidAttributeIdent(NamespacedKeyword),
InvalidAttributeEntid(Entid),
ValueTypeMismatch(ValueType, TypedValue),
AttributeLookupFailed, // Catch-all, because the table lookup code is lazy. TODO
}
impl Debug for EmptyBecause {
fn fmt(&self, f: &mut Formatter) -> Result {
use self::EmptyBecause::*;
match self {
&TypeMismatch(ref var, ref existing, ref desired) => {
write!(f, "Type mismatch: {:?} can't be {:?}, because it's already {:?}",
var, desired, existing)
},
&UnresolvedIdent(ref kw) => {
write!(f, "Couldn't resolve keyword {}", kw)
},
&InvalidAttributeIdent(ref kw) => {
write!(f, "{} does not name an attribute", kw)
},
&InvalidAttributeEntid(entid) => {
write!(f, "{} is not an attribute", entid)
},
&ValueTypeMismatch(value_type, ref typed_value) => {
write!(f, "Type mismatch: {:?} doesn't match attribute type {:?}",
typed_value, value_type)
},
&AttributeLookupFailed => {
write!(f, "Attribute lookup failed")
},
}
}
}
impl Debug for ConjoiningClauses {
fn fmt(&self, fmt: &mut Formatter) -> Result {
fmt.debug_struct("ConjoiningClauses")
.field("is_known_empty", &self.is_known_empty)
.field("from", &self.from)
.field("wheres", &self.wheres)
.field("bindings", &self.bindings)
.field("column_bindings", &self.column_bindings)
.field("input_variables", &self.input_variables)
.field("value_bindings", &self.value_bindings)
.field("known_types", &self.known_types)
.field("extracted_types", &self.extracted_types)
.finish()
@ -239,10 +301,13 @@ impl Default for ConjoiningClauses {
fn default() -> ConjoiningClauses {
ConjoiningClauses {
is_known_empty: false,
empty_because: None,
aliaser: default_table_aliaser(),
from: vec![],
wheres: vec![],
bindings: BTreeMap::new(),
input_variables: BTreeSet::new(),
column_bindings: BTreeMap::new(),
value_bindings: BTreeMap::new(),
known_types: BTreeMap::new(),
extracted_types: BTreeMap::new(),
}
@ -250,22 +315,53 @@ impl Default for ConjoiningClauses {
}
impl ConjoiningClauses {
fn with_value_bindings(bindings: BTreeMap<Variable, TypedValue>) -> ConjoiningClauses {
let mut cc = ConjoiningClauses {
value_bindings: bindings,
..Default::default()
};
// Pre-fill our type mappings with the types of the input bindings.
cc.known_types
.extend(cc.value_bindings.iter()
.map(|(k, v)| (k.clone(), v.value_type())));
cc
}
}
impl ConjoiningClauses {
fn bound_value(&self, var: &Variable) -> Option<TypedValue> {
self.value_bindings.get(var).cloned()
}
pub fn bind_column_to_var(&mut self, table: TableAlias, column: DatomsColumn, var: Variable) {
let alias = QualifiedAlias(table, column);
// Do we have an external binding for this?
if let Some(bound_val) = self.bound_value(&var) {
// Great! Use that instead.
self.constrain_column_to_constant(table, column, bound_val);
return;
}
// Will we have an external binding for this?
// If so, we don't need to extract its type. We'll know it later.
let late_binding = self.input_variables.contains(&var);
// If this is a value, and we don't already know its type or where
// to get its type, record that we can get it from this table.
let needs_type_extraction =
alias.is_value() &&
!self.known_types.contains_key(&var) &&
!self.extracted_types.contains_key(&var);
!late_binding && // Never need to extract for bound vars.
column == DatomsColumn::Value && // Never need to extract types for refs.
!self.known_types.contains_key(&var) && // We know the type!
!self.extracted_types.contains_key(&var); // We're already extracting the type.
let alias = QualifiedAlias(table, column);
// If we subsequently find out its type, we'll remove this later -- see
// the removal in `constrain_var_to_type`.
if needs_type_extraction {
self.extracted_types.insert(var.clone(), alias.for_type_tag());
}
self.bindings.entry(var).or_insert(vec![]).push(alias);
self.column_bindings.entry(var).or_insert(vec![]).push(alias);
}
pub fn constrain_column_to_constant(&mut self, table: TableAlias, column: DatomsColumn, constant: TypedValue) {
@ -285,9 +381,9 @@ impl ConjoiningClauses {
}
/// Constrains the var if there's no existing type.
/// Returns `false` if it's impossible for this type to apply (because there's a conflicting
/// type already known).
fn constrain_var_to_type(&mut self, variable: Variable, this_type: ValueType) -> bool {
/// Marks as known-empty if it's impossible for this type to apply because there's a conflicting
/// type already known.
fn constrain_var_to_type(&mut self, variable: Variable, this_type: ValueType) {
// If this variable now has a known attribute, we can unhook extracted types for
// any other instances of that variable.
// For example, given
@ -300,17 +396,18 @@ impl ConjoiningClauses {
// the second pattern we can avoid that.
self.extracted_types.remove(&variable);
// Is there an existing binding for this variable?
let types_entry = self.known_types.entry(variable);
match types_entry {
// Is there an existing mapping for this variable?
// Any known inputs have already been added to known_types, and so if they conflict we'll
// spot it here.
if let Some(existing) = self.known_types.get(&variable).cloned() {
// If so, the types must match.
Entry::Occupied(entry) =>
*entry.get() == this_type,
// If not, record the one we just determined.
Entry::Vacant(entry) => {
entry.insert(this_type);
true
if existing != this_type {
self.mark_known_empty(EmptyBecause::TypeMismatch(variable, existing, this_type));
}
} else {
// If not, record the one we just determined.
self.known_types.insert(variable, this_type);
}
}
@ -331,15 +428,14 @@ impl ConjoiningClauses {
// Ident or attribute resolution errors (the only other check we need to do) will be done
// by the caller.
if let &PatternNonValuePlace::Variable(ref v) = value {
if !self.constrain_var_to_type(v.clone(), ValueType::Ref) {
self.mark_known_empty("Couldn't constrain var to Ref.");
}
self.constrain_var_to_type(v.clone(), ValueType::Ref)
}
}
fn mark_known_empty(&mut self, why: &str) {
fn mark_known_empty(&mut self, why: EmptyBecause) {
self.is_known_empty = true;
println!("{}", why); // TODO: proper logging.
println!("CC known empty: {:?}.", &why); // TODO: proper logging.
self.empty_because = Some(why);
}
fn entid_for_ident<'s, 'a>(&self, schema: &'s Schema, ident: &'a NamespacedKeyword) -> Option<Entid> {
@ -433,7 +529,7 @@ impl ConjoiningClauses {
/// Expansions.
impl ConjoiningClauses {
/// Take the contents of `bindings` and generate inter-constraints for the appropriate
/// Take the contents of `column_bindings` and generate inter-constraints for the appropriate
/// columns into `wheres`.
///
/// For example, a bindings map associating a var to three places in the query, like
@ -448,8 +544,8 @@ impl ConjoiningClauses {
/// datoms12.e = datoms13.v
/// datoms12.e = datoms14.e
/// ```
pub fn expand_bindings(&mut self) {
for cols in self.bindings.values() {
pub fn expand_column_bindings(&mut self) {
for cols in self.column_bindings.values() {
if cols.len() > 1 {
let ref primary = cols[0];
let secondaries = cols.iter().skip(1);
@ -541,7 +637,7 @@ impl ConjoiningClauses {
match pattern.entity {
PatternNonValuePlace::Placeholder =>
// Placeholders don't contribute any bindings, nor do
// Placeholders don't contribute any column bindings, nor do
// they constrain the query -- there's no need to produce
// IS NOT NULL, because we don't store nulls in our schema.
(),
@ -554,7 +650,7 @@ impl ConjoiningClauses {
self.constrain_column_to_entity(col.clone(), DatomsColumn::Entity, entid)
} else {
// A resolution failure means we're done here.
self.mark_known_empty("Entity ident didn't resolve.");
self.mark_known_empty(EmptyBecause::UnresolvedIdent(ident.clone()));
return;
}
}
@ -569,7 +665,7 @@ impl ConjoiningClauses {
if !schema.is_attribute(entid) {
// Furthermore, that entid must resolve to an attribute. If it doesn't, this
// query is meaningless.
self.mark_known_empty("Attribute entid isn't an attribute.");
self.mark_known_empty(EmptyBecause::InvalidAttributeEntid(entid));
return;
}
self.constrain_attribute(col.clone(), entid)
@ -579,12 +675,12 @@ impl ConjoiningClauses {
self.constrain_attribute(col.clone(), entid);
if !schema.is_attribute(entid) {
self.mark_known_empty("Attribute ident isn't an attribute.");
self.mark_known_empty(EmptyBecause::InvalidAttributeIdent(ident.clone()));
return;
}
} else {
// A resolution failure means we're done here.
self.mark_known_empty("Attribute ident didn't resolve.");
self.mark_known_empty(EmptyBecause::UnresolvedIdent(ident.clone()));
return;
}
}
@ -605,9 +701,8 @@ impl ConjoiningClauses {
if let Some(this_type) = value_type {
// Wouldn't it be nice if we didn't need to clone in the found case?
// It doesn't matter too much: collisons won't be too frequent.
if !self.constrain_var_to_type(v.clone(), this_type) {
// The types don't match. This pattern cannot succeed.
self.mark_known_empty("Value types don't match.");
self.constrain_var_to_type(v.clone(), this_type);
if self.is_known_empty {
return;
}
}
@ -647,7 +742,7 @@ impl ConjoiningClauses {
} else {
// A resolution failure means we're done here: this attribute must have an
// entity value.
self.mark_known_empty("Value ident didn't resolve.");
self.mark_known_empty(EmptyBecause::UnresolvedIdent(kw.clone()));
return;
}
} else {
@ -661,7 +756,11 @@ impl ConjoiningClauses {
let typed_value = c.clone().into_typed_value();
if !typed_value.is_congruent_with(value_type) {
// If the attribute and its value don't match, the pattern must fail.
self.mark_known_empty("Value constant not congruent with attribute type.");
// We can never have a congruence failure if `value_type` is `None`, so we
// forcibly unwrap here.
let value_type = value_type.expect("Congruence failure but couldn't unwrap");
let why = EmptyBecause::ValueTypeMismatch(value_type, typed_value);
self.mark_known_empty(why);
return;
}
@ -707,7 +806,8 @@ impl ConjoiningClauses {
// We didn't determine a table, likely because there was a mismatch
// between an attribute and a value.
// We know we cannot return a result, so we short-circuit here.
self.mark_known_empty("Table aliaser couldn't determine a table.");
self.mark_known_empty(EmptyBecause::AttributeLookupFailed);
return;
}
}
}
@ -793,7 +893,7 @@ mod testing {
assert_eq!(cc.known_types.get(&x).unwrap(), &ValueType::Ref);
// ?x is bound to datoms0.e.
assert_eq!(cc.bindings.get(&x).unwrap(), &vec![d0_e.clone()]);
assert_eq!(cc.column_bindings.get(&x).unwrap(), &vec![d0_e.clone()]);
// Our 'where' clauses are two:
// - datoms0.a = 99
@ -831,7 +931,7 @@ mod testing {
assert_eq!(cc.known_types.get(&x).unwrap(), &ValueType::Ref);
// ?x is bound to datoms0.e.
assert_eq!(cc.bindings.get(&x).unwrap(), &vec![d0_e.clone()]);
assert_eq!(cc.column_bindings.get(&x).unwrap(), &vec![d0_e.clone()]);
// Our 'where' clauses are two:
// - datoms0.v = true
@ -877,8 +977,8 @@ mod testing {
tx: PatternNonValuePlace::Placeholder,
});
// Finally, expand bindings to get the overlaps for ?x.
cc.expand_bindings();
// Finally, expand column bindings to get the overlaps for ?x.
cc.expand_column_bindings();
println!("{:#?}", cc);
@ -898,7 +998,7 @@ mod testing {
assert_eq!(cc.known_types.get(&x).unwrap(), &ValueType::Ref);
// ?x is bound to datoms0.e and datoms1.e.
assert_eq!(cc.bindings.get(&x).unwrap(),
assert_eq!(cc.column_bindings.get(&x).unwrap(),
&vec![
d0_e.clone(),
d1_e.clone(),
@ -917,4 +1017,75 @@ mod testing {
]);
}
#[test]
fn test_value_bindings() {
let mut schema = Schema::default();
associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 99);
add_attribute(&mut schema, 99, Attribute {
value_type: ValueType::Boolean,
..Default::default()
});
let x = Variable(PlainSymbol::new("?x"));
let y = Variable(PlainSymbol::new("?y"));
let b: BTreeMap<Variable, TypedValue> =
vec![(y.clone(), TypedValue::Boolean(true))].into_iter().collect();
let mut cc = ConjoiningClauses::with_value_bindings(b);
cc.apply_pattern(&schema, &Pattern {
source: None,
entity: PatternNonValuePlace::Variable(x.clone()),
attribute: PatternNonValuePlace::Ident(NamespacedKeyword::new("foo", "bar")),
value: PatternValuePlace::Variable(y.clone()),
tx: PatternNonValuePlace::Placeholder,
});
let d0_e = QualifiedAlias("datoms00".to_string(), DatomsColumn::Entity);
let d0_a = QualifiedAlias("datoms00".to_string(), DatomsColumn::Attribute);
let d0_v = QualifiedAlias("datoms00".to_string(), DatomsColumn::Value);
// ?y has been expanded into `true`.
assert_eq!(cc.wheres, vec![
ColumnConstraint::EqualsEntity(d0_a, 99),
ColumnConstraint::EqualsValue(d0_v, TypedValue::Boolean(true)),
]);
// There is no binding for ?y.
assert!(!cc.column_bindings.contains_key(&y));
// ?x is bound to the entity.
assert_eq!(cc.column_bindings.get(&x).unwrap(),
&vec![d0_e.clone()]);
}
#[test]
fn test_value_bindings_type_disagreement() {
let mut schema = Schema::default();
associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 99);
add_attribute(&mut schema, 99, Attribute {
value_type: ValueType::Boolean,
..Default::default()
});
let x = Variable(PlainSymbol::new("?x"));
let y = Variable(PlainSymbol::new("?y"));
let b: BTreeMap<Variable, TypedValue> =
vec![(y.clone(), TypedValue::Long(42))].into_iter().collect();
let mut cc = ConjoiningClauses::with_value_bindings(b);
cc.apply_pattern(&schema, &Pattern {
source: None,
entity: PatternNonValuePlace::Variable(x.clone()),
attribute: PatternNonValuePlace::Ident(NamespacedKeyword::new("foo", "bar")),
value: PatternValuePlace::Variable(y.clone()),
tx: PatternNonValuePlace::Placeholder,
});
// The type of the provided binding doesn't match the type of the attribute.
assert!(cc.is_known_empty);
}
}

View file

@ -212,7 +212,7 @@ fn project_elements<'a, I: IntoIterator<Item = &'a Element>>(
// one column in the query. If that constraint is violated it's a
// bug in our code, so it's appropriate to panic here.
let columns = query.cc
.bindings
.column_bindings
.get(var)
.expect("Every variable has a binding");