Ensure that variable bindings are used when selecting a table. r=nalexander,etoop
For queries like ```edn [:find ?x :where [?x _ "hello"]] [:find [?v ...] :where [_ ?a ?v]] ``` we'll query `all_datoms` to handle fulltext strings, which is expensive. If `?a` is bound, we can avoid this — resolve any keyword binding, ensure that the value is an attribute, and use the appropriate table.
This commit is contained in:
parent
dc6a7a4128
commit
70e5759b5f
1 changed files with 255 additions and 33 deletions
|
@ -176,6 +176,19 @@ impl Debug for ColumnConstraint {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
trait OptionEffect<T> {
|
||||||
|
fn when_not<F: FnOnce()>(self, f: F) -> Option<T>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> OptionEffect<T> for Option<T> {
|
||||||
|
fn when_not<F: FnOnce()>(self, f: F) -> Option<T> {
|
||||||
|
if self.is_none() {
|
||||||
|
f();
|
||||||
|
}
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// A `ConjoiningClauses` (CC) is a collection of clauses that are combined with `JOIN`.
|
/// A `ConjoiningClauses` (CC) is a collection of clauses that are combined with `JOIN`.
|
||||||
/// The topmost form in a query is a `ConjoiningClauses`.
|
/// The topmost form in a query is a `ConjoiningClauses`.
|
||||||
///
|
///
|
||||||
|
@ -243,12 +256,14 @@ pub struct ConjoiningClauses {
|
||||||
extracted_types: BTreeMap<Variable, QualifiedAlias>,
|
extracted_types: BTreeMap<Variable, QualifiedAlias>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(PartialEq)]
|
||||||
pub enum EmptyBecause {
|
pub enum EmptyBecause {
|
||||||
// Var, existing, desired.
|
// Var, existing, desired.
|
||||||
TypeMismatch(Variable, ValueType, ValueType),
|
TypeMismatch(Variable, ValueType, ValueType),
|
||||||
UnresolvedIdent(NamespacedKeyword),
|
UnresolvedIdent(NamespacedKeyword),
|
||||||
InvalidAttributeIdent(NamespacedKeyword),
|
InvalidAttributeIdent(NamespacedKeyword),
|
||||||
InvalidAttributeEntid(Entid),
|
InvalidAttributeEntid(Entid),
|
||||||
|
InvalidBinding(DatomsColumn, TypedValue),
|
||||||
ValueTypeMismatch(ValueType, TypedValue),
|
ValueTypeMismatch(ValueType, TypedValue),
|
||||||
AttributeLookupFailed, // Catch-all, because the table lookup code is lazy. TODO
|
AttributeLookupFailed, // Catch-all, because the table lookup code is lazy. TODO
|
||||||
}
|
}
|
||||||
|
@ -270,6 +285,9 @@ impl Debug for EmptyBecause {
|
||||||
&InvalidAttributeEntid(entid) => {
|
&InvalidAttributeEntid(entid) => {
|
||||||
write!(f, "{} is not an attribute", entid)
|
write!(f, "{} is not an attribute", entid)
|
||||||
},
|
},
|
||||||
|
&InvalidBinding(ref column, ref tv) => {
|
||||||
|
write!(f, "{:?} cannot name column {:?}", tv, column)
|
||||||
|
},
|
||||||
&ValueTypeMismatch(value_type, ref typed_value) => {
|
&ValueTypeMismatch(value_type, ref typed_value) => {
|
||||||
write!(f, "Type mismatch: {:?} doesn't match attribute type {:?}",
|
write!(f, "Type mismatch: {:?} doesn't match attribute type {:?}",
|
||||||
typed_value, value_type)
|
typed_value, value_type)
|
||||||
|
@ -334,11 +352,38 @@ impl ConjoiningClauses {
|
||||||
self.value_bindings.get(var).cloned()
|
self.value_bindings.get(var).cloned()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn bind_column_to_var(&mut self, table: TableAlias, column: DatomsColumn, var: Variable) {
|
pub fn bind_column_to_var(&mut self, schema: &Schema, table: TableAlias, column: DatomsColumn, var: Variable) {
|
||||||
// Do we have an external binding for this?
|
// Do we have an external binding for this?
|
||||||
if let Some(bound_val) = self.bound_value(&var) {
|
if let Some(bound_val) = self.bound_value(&var) {
|
||||||
// Great! Use that instead.
|
// Great! Use that instead.
|
||||||
|
// We expect callers to do things like bind keywords here; we need to translate these
|
||||||
|
// before they hit our constraints.
|
||||||
|
// TODO: recognize when the valueType might be a ref and also translate entids there.
|
||||||
|
if column == DatomsColumn::Value {
|
||||||
self.constrain_column_to_constant(table, column, bound_val);
|
self.constrain_column_to_constant(table, column, bound_val);
|
||||||
|
} else {
|
||||||
|
match bound_val {
|
||||||
|
TypedValue::Keyword(ref kw) => {
|
||||||
|
if let Some(entid) = self.entid_for_ident(schema, kw) {
|
||||||
|
self.constrain_column_to_entity(table, column, entid);
|
||||||
|
} else {
|
||||||
|
// Impossible.
|
||||||
|
// For attributes this shouldn't occur, because we check the binding in
|
||||||
|
// `table_for_places`/`alias_table`, and if it didn't resolve to a valid
|
||||||
|
// attribute then we should have already marked the pattern as empty.
|
||||||
|
self.mark_known_empty(EmptyBecause::UnresolvedIdent(kw.clone()));
|
||||||
|
}
|
||||||
|
},
|
||||||
|
TypedValue::Ref(entid) => {
|
||||||
|
self.constrain_column_to_entity(table, column, entid);
|
||||||
|
},
|
||||||
|
_ => {
|
||||||
|
// One can't bind an e, a, or tx to something other than an entity.
|
||||||
|
self.mark_known_empty(EmptyBecause::InvalidBinding(column, bound_val));
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -434,6 +479,9 @@ impl ConjoiningClauses {
|
||||||
|
|
||||||
fn mark_known_empty(&mut self, why: EmptyBecause) {
|
fn mark_known_empty(&mut self, why: EmptyBecause) {
|
||||||
self.is_known_empty = true;
|
self.is_known_empty = true;
|
||||||
|
if self.empty_because.is_some() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
println!("CC known empty: {:?}.", &why); // TODO: proper logging.
|
println!("CC known empty: {:?}.", &why); // TODO: proper logging.
|
||||||
self.empty_because = Some(why);
|
self.empty_because = Some(why);
|
||||||
}
|
}
|
||||||
|
@ -466,13 +514,7 @@ impl ConjoiningClauses {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn table_for_places<'s, 'a>(&self, schema: &'s Schema, attribute: &'a PatternNonValuePlace, value: &'a PatternValuePlace) -> Option<DatomsTable> {
|
fn table_for_unknown_attribute<'s, 'a>(&self, value: &'a PatternValuePlace) -> Option<DatomsTable> {
|
||||||
match attribute {
|
|
||||||
// TODO: In a non-prepared context, check if a var is known by external binding, and
|
|
||||||
// choose the table accordingly, as if it weren't a variable. #279.
|
|
||||||
// TODO: In a prepared context, defer this decision until a second algebrizing phase.
|
|
||||||
// #278.
|
|
||||||
&PatternNonValuePlace::Placeholder | &PatternNonValuePlace::Variable(_) =>
|
|
||||||
// If the value is known to be non-textual, we can simply use the regular datoms
|
// If the value is known to be non-textual, we can simply use the regular datoms
|
||||||
// table (TODO: and exclude on `index_fulltext`!).
|
// table (TODO: and exclude on `index_fulltext`!).
|
||||||
//
|
//
|
||||||
|
@ -491,13 +533,52 @@ impl ConjoiningClauses {
|
||||||
DatomsTable::AllDatoms,
|
DatomsTable::AllDatoms,
|
||||||
_ =>
|
_ =>
|
||||||
DatomsTable::Datoms,
|
DatomsTable::Datoms,
|
||||||
}),
|
})
|
||||||
&PatternNonValuePlace::Entid(id) =>
|
}
|
||||||
schema.attribute_for_entid(id)
|
|
||||||
.and_then(|attribute| self.table_for_attribute_and_value(attribute, value)),
|
/// Decide which table to use for the provided attribute and value.
|
||||||
|
/// If the attribute input or value binding doesn't name an attribute, or doesn't name an
|
||||||
|
/// attribute that is congruent with the supplied value, we mark the CC as known-empty and
|
||||||
|
/// return `None`.
|
||||||
|
fn table_for_places<'s, 'a>(&mut self, schema: &'s Schema, attribute: &'a PatternNonValuePlace, value: &'a PatternValuePlace) -> Option<DatomsTable> {
|
||||||
|
match attribute {
|
||||||
&PatternNonValuePlace::Ident(ref kw) =>
|
&PatternNonValuePlace::Ident(ref kw) =>
|
||||||
schema.attribute_for_ident(kw)
|
schema.attribute_for_ident(kw)
|
||||||
|
.when_not(|| self.mark_known_empty(EmptyBecause::InvalidAttributeIdent(kw.clone())))
|
||||||
.and_then(|attribute| self.table_for_attribute_and_value(attribute, value)),
|
.and_then(|attribute| self.table_for_attribute_and_value(attribute, value)),
|
||||||
|
&PatternNonValuePlace::Entid(id) =>
|
||||||
|
schema.attribute_for_entid(id)
|
||||||
|
.when_not(|| self.mark_known_empty(EmptyBecause::InvalidAttributeEntid(id)))
|
||||||
|
.and_then(|attribute| self.table_for_attribute_and_value(attribute, value)),
|
||||||
|
// TODO: In a prepared context, defer this decision until a second algebrizing phase.
|
||||||
|
// #278.
|
||||||
|
&PatternNonValuePlace::Placeholder =>
|
||||||
|
self.table_for_unknown_attribute(value),
|
||||||
|
&PatternNonValuePlace::Variable(ref v) => {
|
||||||
|
// See if we have a binding for the variable.
|
||||||
|
match self.bound_value(v) {
|
||||||
|
// TODO: In a prepared context, defer this decision until a second algebrizing phase.
|
||||||
|
// #278.
|
||||||
|
None =>
|
||||||
|
self.table_for_unknown_attribute(value),
|
||||||
|
Some(TypedValue::Ref(id)) =>
|
||||||
|
// Recurse: it's easy.
|
||||||
|
self.table_for_places(schema, &PatternNonValuePlace::Entid(id), value),
|
||||||
|
Some(TypedValue::Keyword(ref kw)) =>
|
||||||
|
// Don't recurse: avoid needing to clone the keyword.
|
||||||
|
schema.attribute_for_ident(kw)
|
||||||
|
.when_not(|| self.mark_known_empty(EmptyBecause::InvalidAttributeIdent(kw.clone())))
|
||||||
|
.and_then(|attribute| self.table_for_attribute_and_value(attribute, value)),
|
||||||
|
Some(v) => {
|
||||||
|
// This pattern cannot match: the caller has bound a non-entity value to an
|
||||||
|
// attribute place. Return `None` and invalidate this CC.
|
||||||
|
self.mark_known_empty(EmptyBecause::InvalidBinding(DatomsColumn::Attribute,
|
||||||
|
v.clone()));
|
||||||
|
None
|
||||||
|
},
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -507,6 +588,7 @@ impl ConjoiningClauses {
|
||||||
/// `is_known_empty`.
|
/// `is_known_empty`.
|
||||||
fn alias_table<'s, 'a>(&mut self, schema: &'s Schema, pattern: &'a Pattern) -> Option<SourceAlias> {
|
fn alias_table<'s, 'a>(&mut self, schema: &'s Schema, pattern: &'a Pattern) -> Option<SourceAlias> {
|
||||||
self.table_for_places(schema, &pattern.attribute, &pattern.value)
|
self.table_for_places(schema, &pattern.attribute, &pattern.value)
|
||||||
|
.when_not(|| assert!(self.is_known_empty)) // table_for_places should have flipped this.
|
||||||
.map(|table| SourceAlias(table, (self.aliaser)(table)))
|
.map(|table| SourceAlias(table, (self.aliaser)(table)))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -642,7 +724,7 @@ impl ConjoiningClauses {
|
||||||
// IS NOT NULL, because we don't store nulls in our schema.
|
// IS NOT NULL, because we don't store nulls in our schema.
|
||||||
(),
|
(),
|
||||||
PatternNonValuePlace::Variable(ref v) =>
|
PatternNonValuePlace::Variable(ref v) =>
|
||||||
self.bind_column_to_var(col.clone(), DatomsColumn::Entity, v.clone()),
|
self.bind_column_to_var(schema, col.clone(), DatomsColumn::Entity, v.clone()),
|
||||||
PatternNonValuePlace::Entid(entid) =>
|
PatternNonValuePlace::Entid(entid) =>
|
||||||
self.constrain_column_to_entity(col.clone(), DatomsColumn::Entity, entid),
|
self.constrain_column_to_entity(col.clone(), DatomsColumn::Entity, entid),
|
||||||
PatternNonValuePlace::Ident(ref ident) => {
|
PatternNonValuePlace::Ident(ref ident) => {
|
||||||
|
@ -660,7 +742,7 @@ impl ConjoiningClauses {
|
||||||
PatternNonValuePlace::Placeholder =>
|
PatternNonValuePlace::Placeholder =>
|
||||||
(),
|
(),
|
||||||
PatternNonValuePlace::Variable(ref v) =>
|
PatternNonValuePlace::Variable(ref v) =>
|
||||||
self.bind_column_to_var(col.clone(), DatomsColumn::Attribute, v.clone()),
|
self.bind_column_to_var(schema, col.clone(), DatomsColumn::Attribute, v.clone()),
|
||||||
PatternNonValuePlace::Entid(entid) => {
|
PatternNonValuePlace::Entid(entid) => {
|
||||||
if !schema.is_attribute(entid) {
|
if !schema.is_attribute(entid) {
|
||||||
// Furthermore, that entid must resolve to an attribute. If it doesn't, this
|
// Furthermore, that entid must resolve to an attribute. If it doesn't, this
|
||||||
|
@ -707,7 +789,7 @@ impl ConjoiningClauses {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
self.bind_column_to_var(col.clone(), DatomsColumn::Value, v.clone());
|
self.bind_column_to_var(schema, col.clone(), DatomsColumn::Value, v.clone());
|
||||||
},
|
},
|
||||||
PatternValuePlace::EntidOrInteger(i) =>
|
PatternValuePlace::EntidOrInteger(i) =>
|
||||||
// If we know the valueType, then we can determine whether this is an entid or an
|
// If we know the valueType, then we can determine whether this is an entid or an
|
||||||
|
@ -943,6 +1025,146 @@ mod testing {
|
||||||
]);
|
]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// This test ensures that we do less work if we know the attribute thanks to a var lookup.
|
||||||
|
#[test]
|
||||||
|
fn test_apply_unattributed_but_bound_pattern_with_returned() {
|
||||||
|
let mut cc = ConjoiningClauses::default();
|
||||||
|
let mut schema = Schema::default();
|
||||||
|
associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 99);
|
||||||
|
add_attribute(&mut schema, 99, Attribute {
|
||||||
|
value_type: ValueType::Boolean,
|
||||||
|
..Default::default()
|
||||||
|
});
|
||||||
|
|
||||||
|
let x = Variable(PlainSymbol::new("?x"));
|
||||||
|
let a = Variable(PlainSymbol::new("?a"));
|
||||||
|
let v = Variable(PlainSymbol::new("?v"));
|
||||||
|
|
||||||
|
cc.input_variables.insert(a.clone());
|
||||||
|
cc.value_bindings.insert(a.clone(), TypedValue::Keyword(NamespacedKeyword::new("foo", "bar")));
|
||||||
|
cc.apply_pattern(&schema, &Pattern {
|
||||||
|
source: None,
|
||||||
|
entity: PatternNonValuePlace::Variable(x.clone()),
|
||||||
|
attribute: PatternNonValuePlace::Variable(a.clone()),
|
||||||
|
value: PatternValuePlace::Variable(v.clone()),
|
||||||
|
tx: PatternNonValuePlace::Placeholder,
|
||||||
|
});
|
||||||
|
|
||||||
|
// println!("{:#?}", cc);
|
||||||
|
|
||||||
|
let d0_e = QualifiedAlias("datoms00".to_string(), DatomsColumn::Entity);
|
||||||
|
let d0_a = QualifiedAlias("datoms00".to_string(), DatomsColumn::Attribute);
|
||||||
|
|
||||||
|
assert!(!cc.is_known_empty);
|
||||||
|
assert_eq!(cc.from, vec![SourceAlias(DatomsTable::Datoms, "datoms00".to_string())]);
|
||||||
|
|
||||||
|
// ?x must be a ref.
|
||||||
|
assert_eq!(cc.known_types.get(&x).unwrap(), &ValueType::Ref);
|
||||||
|
|
||||||
|
// ?x is bound to datoms0.e.
|
||||||
|
assert_eq!(cc.column_bindings.get(&x).unwrap(), &vec![d0_e.clone()]);
|
||||||
|
assert_eq!(cc.wheres, vec![
|
||||||
|
ColumnConstraint::EqualsEntity(d0_a, 99),
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Queries that bind non-entity values to entity places can't return results.
|
||||||
|
#[test]
|
||||||
|
fn test_bind_the_wrong_thing() {
|
||||||
|
let mut cc = ConjoiningClauses::default();
|
||||||
|
let schema = Schema::default();
|
||||||
|
|
||||||
|
let x = Variable(PlainSymbol::new("?x"));
|
||||||
|
let a = Variable(PlainSymbol::new("?a"));
|
||||||
|
let v = Variable(PlainSymbol::new("?v"));
|
||||||
|
let hello = TypedValue::String("hello".to_string());
|
||||||
|
|
||||||
|
cc.input_variables.insert(a.clone());
|
||||||
|
cc.value_bindings.insert(a.clone(), hello.clone());
|
||||||
|
cc.apply_pattern(&schema, &Pattern {
|
||||||
|
source: None,
|
||||||
|
entity: PatternNonValuePlace::Variable(x.clone()),
|
||||||
|
attribute: PatternNonValuePlace::Variable(a.clone()),
|
||||||
|
value: PatternValuePlace::Variable(v.clone()),
|
||||||
|
tx: PatternNonValuePlace::Placeholder,
|
||||||
|
});
|
||||||
|
|
||||||
|
assert!(cc.is_known_empty);
|
||||||
|
assert_eq!(cc.empty_because.unwrap(), EmptyBecause::InvalidBinding(DatomsColumn::Attribute, hello));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// This test ensures that we query all_datoms if we're possibly retrieving a string.
|
||||||
|
#[test]
|
||||||
|
fn test_apply_unattributed_pattern_with_returned() {
|
||||||
|
let mut cc = ConjoiningClauses::default();
|
||||||
|
let schema = Schema::default();
|
||||||
|
|
||||||
|
let x = Variable(PlainSymbol::new("?x"));
|
||||||
|
let a = Variable(PlainSymbol::new("?a"));
|
||||||
|
let v = Variable(PlainSymbol::new("?v"));
|
||||||
|
cc.apply_pattern(&schema, &Pattern {
|
||||||
|
source: None,
|
||||||
|
entity: PatternNonValuePlace::Variable(x.clone()),
|
||||||
|
attribute: PatternNonValuePlace::Variable(a.clone()),
|
||||||
|
value: PatternValuePlace::Variable(v.clone()),
|
||||||
|
tx: PatternNonValuePlace::Placeholder,
|
||||||
|
});
|
||||||
|
|
||||||
|
// println!("{:#?}", cc);
|
||||||
|
|
||||||
|
let d0_e = QualifiedAlias("all_datoms00".to_string(), DatomsColumn::Entity);
|
||||||
|
|
||||||
|
assert!(!cc.is_known_empty);
|
||||||
|
assert_eq!(cc.from, vec![SourceAlias(DatomsTable::AllDatoms, "all_datoms00".to_string())]);
|
||||||
|
|
||||||
|
// ?x must be a ref.
|
||||||
|
assert_eq!(cc.known_types.get(&x).unwrap(), &ValueType::Ref);
|
||||||
|
|
||||||
|
// ?x is bound to datoms0.e.
|
||||||
|
assert_eq!(cc.column_bindings.get(&x).unwrap(), &vec![d0_e.clone()]);
|
||||||
|
assert_eq!(cc.wheres, vec![]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// This test ensures that we query all_datoms if we're looking for a string.
|
||||||
|
#[test]
|
||||||
|
fn test_apply_unattributed_pattern_with_string_value() {
|
||||||
|
let mut cc = ConjoiningClauses::default();
|
||||||
|
let schema = Schema::default();
|
||||||
|
|
||||||
|
let x = Variable(PlainSymbol::new("?x"));
|
||||||
|
cc.apply_pattern(&schema, &Pattern {
|
||||||
|
source: None,
|
||||||
|
entity: PatternNonValuePlace::Variable(x.clone()),
|
||||||
|
attribute: PatternNonValuePlace::Placeholder,
|
||||||
|
value: PatternValuePlace::Constant(NonIntegerConstant::Text("hello".to_string())),
|
||||||
|
tx: PatternNonValuePlace::Placeholder,
|
||||||
|
});
|
||||||
|
|
||||||
|
// println!("{:#?}", cc);
|
||||||
|
|
||||||
|
let d0_e = QualifiedAlias("all_datoms00".to_string(), DatomsColumn::Entity);
|
||||||
|
let d0_v = QualifiedAlias("all_datoms00".to_string(), DatomsColumn::Value);
|
||||||
|
|
||||||
|
assert!(!cc.is_known_empty);
|
||||||
|
assert_eq!(cc.from, vec![SourceAlias(DatomsTable::AllDatoms, "all_datoms00".to_string())]);
|
||||||
|
|
||||||
|
// ?x must be a ref.
|
||||||
|
assert_eq!(cc.known_types.get(&x).unwrap(), &ValueType::Ref);
|
||||||
|
|
||||||
|
// ?x is bound to datoms0.e.
|
||||||
|
assert_eq!(cc.column_bindings.get(&x).unwrap(), &vec![d0_e.clone()]);
|
||||||
|
|
||||||
|
// Our 'where' clauses are two:
|
||||||
|
// - datoms0.v = 'hello'
|
||||||
|
// - datoms0.value_type_tag = string
|
||||||
|
// TODO: implement expand_type_tags.
|
||||||
|
assert_eq!(cc.wheres, vec![
|
||||||
|
ColumnConstraint::EqualsValue(d0_v, TypedValue::String("hello".to_string())),
|
||||||
|
ColumnConstraint::HasType("all_datoms00".to_string(), ValueType::String),
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_apply_two_patterns() {
|
fn test_apply_two_patterns() {
|
||||||
let mut cc = ConjoiningClauses::default();
|
let mut cc = ConjoiningClauses::default();
|
||||||
|
|
Loading…
Reference in a new issue