Process type predicates before other WhereClauses, to avoid querying all_datoms where possible

This commit is contained in:
Thom Chiovoloni 2018-01-19 16:09:11 -05:00
parent 89d3114bd1
commit bd48e2c741
8 changed files with 63 additions and 64 deletions

View file

@ -83,6 +83,8 @@ use validate::{
validate_or_join, validate_or_join,
}; };
use self::predicate::parse_type_predicate;
pub use self::inputs::QueryInputs; pub use self::inputs::QueryInputs;
// We do this a lot for errors. // We do this a lot for errors.
@ -366,7 +368,7 @@ impl ConjoiningClauses {
// Are we also trying to figure out the type of the value when the query runs? // Are we also trying to figure out the type of the value when the query runs?
// If so, constrain that! // If so, constrain that!
if let Some(qa) = self.extracted_types.get(&var) { if let Some(qa) = self.extracted_types.get(&var) {
self.wheres.add_intersection(ColumnConstraint::has_type(qa.0.clone(), vt)); self.wheres.add_intersection(ColumnConstraint::has_unit_type(qa.0.clone(), vt));
} }
// Finally, store the binding for future use. // Finally, store the binding for future use.
@ -406,14 +408,6 @@ impl ConjoiningClauses {
self.known_types.get(var).cloned().unwrap_or(ValueTypeSet::any()) self.known_types.get(var).cloned().unwrap_or(ValueTypeSet::any())
} }
fn required_type_set(&self, var: &Variable) -> ValueTypeSet {
self.required_types.get(var).cloned().unwrap_or(ValueTypeSet::any())
}
fn possible_type_set(&self, var: &Variable) -> ValueTypeSet {
self.known_type_set(var).intersection(&self.required_type_set(var))
}
pub fn bind_column_to_var<C: Into<Column>>(&mut self, schema: &Schema, table: TableAlias, column: C, var: Variable) { pub fn bind_column_to_var<C: Into<Column>>(&mut self, schema: &Schema, table: TableAlias, column: C, var: Variable) {
let column = column.into(); let column = column.into();
// Do we have an external binding for this? // Do we have an external binding for this?
@ -733,12 +727,13 @@ impl ConjoiningClauses {
// TODO: see if the variable is projected, aggregated, or compared elsewhere in // TODO: see if the variable is projected, aggregated, or compared elsewhere in
// the query. If it's not, we don't need to use all_datoms here. // the query. If it's not, we don't need to use all_datoms here.
&PatternValuePlace::Variable(ref v) => { &PatternValuePlace::Variable(ref v) => {
// Do we know that this variable can't be a string? If so, we don't need // If `required_types` and `known_types` don't exclude strings,
// AllDatoms. // we need to query `all_datoms`.
if !self.possible_type_set(v).contains(ValueType::String) { if self.required_types.get(v).map_or(true, |s| s.contains(ValueType::String)) &&
DatomsTable::Datoms self.known_types.get(v).map_or(true, |s| s.contains(ValueType::String)) {
} else {
DatomsTable::AllDatoms DatomsTable::AllDatoms
} else {
DatomsTable::Datoms
} }
} }
&PatternValuePlace::Constant(NonIntegerConstant::Text(_)) => &PatternValuePlace::Constant(NonIntegerConstant::Text(_)) =>
@ -905,8 +900,8 @@ impl ConjoiningClauses {
// the variable could take, then we know we're empty. // the variable could take, then we know we're empty.
empty_because = Some(EmptyBecause::TypeMismatch { empty_because = Some(EmptyBecause::TypeMismatch {
var: var.clone(), var: var.clone(),
existing: already_known.clone(), existing: *already_known,
desired: types.clone(), desired: *types,
}); });
break; break;
} }
@ -973,6 +968,26 @@ impl ConjoiningClauses {
} }
impl ConjoiningClauses { impl ConjoiningClauses {
pub fn apply_clauses(&mut self, schema: &Schema, where_clauses: Vec<WhereClause>) -> Result<()> {
let mut deferred = Vec::with_capacity(where_clauses.len());
// We apply (top level) type predicates first as an optimization.
for c in where_clauses {
match &c {
&WhereClause::Pred(ref p) => {
if let Some(ty) = parse_type_predicate(p.operator.0.as_str()) {
self.apply_type_requirement(p, ty)?;
}
},
_ => {}
};
deferred.push(c);
}
// Then we apply everything else.
for c in deferred {
self.apply_clause(schema, c)?;
}
Ok(())
}
// This is here, rather than in `lib.rs`, because it's recursive: `or` can contain `or`, // This is here, rather than in `lib.rs`, because it's recursive: `or` can contain `or`,
// and so on. // and so on.
pub fn apply_clause(&mut self, schema: &Schema, where_clause: WhereClause) -> Result<()> { pub fn apply_clause(&mut self, schema: &Schema, where_clause: WhereClause) -> Result<()> {

View file

@ -49,9 +49,7 @@ impl ConjoiningClauses {
} }
} }
for clause in not_join.clauses.into_iter() { template.apply_clauses(&schema, not_join.clauses)?;
template.apply_clause(&schema, clause)?;
}
if template.is_known_empty() { if template.is_known_empty() {
return Ok(()); return Ok(());

View file

@ -96,9 +96,7 @@ impl ConjoiningClauses {
// [:find ?x :where (or (and [?x _ 5] [?x :foo/bar 7]))] // [:find ?x :where (or (and [?x _ 5] [?x :foo/bar 7]))]
// which is equivalent to dropping the `or` _and_ the `and`! // which is equivalent to dropping the `or` _and_ the `and`!
OrWhereClause::And(clauses) => { OrWhereClause::And(clauses) => {
for clause in clauses { self.apply_clauses(schema, clauses)?;
self.apply_clause(schema, clause)?;
}
Ok(()) Ok(())
}, },
} }
@ -564,9 +562,7 @@ impl ConjoiningClauses {
let mut receptacle = template.make_receptacle(); let mut receptacle = template.make_receptacle();
match clause { match clause {
OrWhereClause::And(clauses) => { OrWhereClause::And(clauses) => {
for clause in clauses { receptacle.apply_clauses(&schema, clauses)?;
receptacle.apply_clause(&schema, clause)?;
}
}, },
OrWhereClause::Clause(clause) => { OrWhereClause::Clause(clause) => {
receptacle.apply_clause(&schema, clause)?; receptacle.apply_clause(&schema, clause)?;

View file

@ -201,7 +201,7 @@ impl ConjoiningClauses {
} else { } else {
// It must be a keyword. // It must be a keyword.
self.constrain_column_to_constant(col.clone(), DatomsColumn::Value, TypedValue::Keyword(kw.clone())); self.constrain_column_to_constant(col.clone(), DatomsColumn::Value, TypedValue::Keyword(kw.clone()));
self.wheres.add_intersection(ColumnConstraint::has_type(col.clone(), ValueType::Keyword)); self.wheres.add_intersection(ColumnConstraint::has_unit_type(col.clone(), ValueType::Keyword));
}; };
}, },
PatternValuePlace::Constant(ref c) => { PatternValuePlace::Constant(ref c) => {
@ -237,7 +237,8 @@ impl ConjoiningClauses {
// Because everything we handle here is unambiguous, we generate a single type // Because everything we handle here is unambiguous, we generate a single type
// restriction from the value type of the typed value. // restriction from the value type of the typed value.
if value_type.is_none() { if value_type.is_none() {
self.wheres.add_intersection(ColumnConstraint::has_type(col.clone(), typed_value_type)); self.wheres.add_intersection(
ColumnConstraint::has_unit_type(col.clone(), typed_value_type));
} }
}, },
} }
@ -445,7 +446,7 @@ mod testing {
// TODO: implement expand_type_tags. // TODO: implement expand_type_tags.
assert_eq!(cc.wheres, vec![ assert_eq!(cc.wheres, vec![
ColumnConstraint::Equals(d0_v, QueryValue::TypedValue(TypedValue::Boolean(true))), ColumnConstraint::Equals(d0_v, QueryValue::TypedValue(TypedValue::Boolean(true))),
ColumnConstraint::has_type("datoms00".to_string(), ValueType::Boolean), ColumnConstraint::has_unit_type("datoms00".to_string(), ValueType::Boolean),
].into()); ].into());
} }
@ -589,7 +590,7 @@ mod testing {
// TODO: implement expand_type_tags. // TODO: implement expand_type_tags.
assert_eq!(cc.wheres, vec![ assert_eq!(cc.wheres, vec![
ColumnConstraint::Equals(d0_v, QueryValue::TypedValue(TypedValue::String(Rc::new("hello".to_string())))), ColumnConstraint::Equals(d0_v, QueryValue::TypedValue(TypedValue::String(Rc::new("hello".to_string())))),
ColumnConstraint::has_type("all_datoms00".to_string(), ValueType::String), ColumnConstraint::has_unit_type("all_datoms00".to_string(), ValueType::String),
].into()); ].into());
} }

View file

@ -34,7 +34,7 @@ use types::{
Inequality, Inequality,
}; };
fn value_type_function_name(s: &str) -> Option<ValueType> { pub fn parse_type_predicate(s: &str) -> Option<ValueType> {
match s { match s {
"ref" => Some(ValueType::Ref), "ref" => Some(ValueType::Ref),
"boolean" => Some(ValueType::Boolean), "boolean" => Some(ValueType::Boolean),
@ -62,8 +62,8 @@ impl ConjoiningClauses {
// and ultimately allowing user-specified predicates, we match on the predicate name first. // and ultimately allowing user-specified predicates, we match on the predicate name first.
if let Some(op) = Inequality::from_datalog_operator(predicate.operator.0.as_str()) { if let Some(op) = Inequality::from_datalog_operator(predicate.operator.0.as_str()) {
self.apply_inequality(schema, op, predicate) self.apply_inequality(schema, op, predicate)
} else if let Some(ty) = value_type_function_name(predicate.operator.0.as_str()) { } else if let Some(ty) = parse_type_predicate(predicate.operator.0.as_str()) {
self.apply_type_requirement(predicate, ty) self.apply_type_requirement(&predicate, ty)
} else { } else {
bail!(ErrorKind::UnknownFunction(predicate.operator.clone())) bail!(ErrorKind::UnknownFunction(predicate.operator.clone()))
} }
@ -76,14 +76,13 @@ impl ConjoiningClauses {
} }
} }
pub fn apply_type_requirement(&mut self, pred: Predicate, ty: ValueType) -> Result<()> { pub fn apply_type_requirement(&mut self, pred: &Predicate, ty: ValueType) -> Result<()> {
if pred.args.len() != 1 { if pred.args.len() != 1 {
bail!(ErrorKind::InvalidNumberOfArguments(pred.operator.clone(), pred.args.len(), 1)); bail!(ErrorKind::InvalidNumberOfArguments(pred.operator.clone(), pred.args.len(), 1));
} }
let mut args = pred.args.into_iter();
if let FnArg::Variable(v) = args.next().unwrap() { if let &FnArg::Variable(ref v) = &pred.args[0] {
self.add_type_requirement(v, ValueTypeSet::of_one(ty)); self.add_type_requirement(v.clone(), ValueTypeSet::of_one(ty));
Ok(()) Ok(())
} else { } else {
bail!(ErrorKind::InvalidArgument(pred.operator.clone(), "variable".into(), 0)) bail!(ErrorKind::InvalidArgument(pred.operator.clone(), "variable".into(), 0))

View file

@ -179,10 +179,8 @@ pub fn algebrize_with_inputs(schema: &Schema,
// TODO: integrate default source into pattern processing. // TODO: integrate default source into pattern processing.
// TODO: flesh out the rest of find-into-context. // TODO: flesh out the rest of find-into-context.
let where_clauses = parsed.where_clauses; cc.apply_clauses(schema, parsed.where_clauses)?;
for where_clause in where_clauses {
cc.apply_clause(schema, where_clause)?;
}
cc.expand_column_bindings(); cc.expand_column_bindings();
cc.prune_extracted_types(); cc.prune_extracted_types();
cc.process_required_types()?; cc.process_required_types()?;

View file

@ -344,7 +344,7 @@ pub enum ColumnConstraint {
} }
impl ColumnConstraint { impl ColumnConstraint {
pub fn has_type(value: TableAlias, value_type: ValueType) -> ColumnConstraint { pub fn has_unit_type(value: TableAlias, value_type: ValueType) -> ColumnConstraint {
ColumnConstraint::HasTypes { ColumnConstraint::HasTypes {
value, value,
value_types: ValueTypeSet::of_one(value_type), value_types: ValueTypeSet::of_one(value_type),

View file

@ -293,10 +293,10 @@ fn test_type_required_long() {
let query = r#"[:find ?x :where [?x _ ?e] [(long ?e)]]"#; let query = r#"[:find ?x :where [?x _ ?e] [(long ?e)]]"#;
let SQLQuery { sql, args } = translate(&schema, query); let SQLQuery { sql, args } = translate(&schema, query);
assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x` \ assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` \
FROM `all_datoms` AS `all_datoms00` \ FROM `datoms` AS `datoms00` \
WHERE ((`all_datoms00`.value_type_tag = 5 AND \ WHERE ((`datoms00`.value_type_tag = 5 AND \
typeof(`all_datoms00`.v) = 'integer'))"); typeof(`datoms00`.v) = 'integer'))");
assert_eq!(args, vec![]); assert_eq!(args, vec![]);
} }
@ -308,10 +308,10 @@ fn test_type_required_double() {
let query = r#"[:find ?x :where [?x _ ?e] [(double ?e)]]"#; let query = r#"[:find ?x :where [?x _ ?e] [(double ?e)]]"#;
let SQLQuery { sql, args } = translate(&schema, query); let SQLQuery { sql, args } = translate(&schema, query);
assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x` \ assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` \
FROM `all_datoms` AS `all_datoms00` \ FROM `datoms` AS `datoms00` \
WHERE ((`all_datoms00`.value_type_tag = 5 AND \ WHERE ((`datoms00`.value_type_tag = 5 AND \
typeof(`all_datoms00`.v) = 'real'))"); typeof(`datoms00`.v) = 'real'))");
assert_eq!(args, vec![]); assert_eq!(args, vec![]);
} }
@ -323,29 +323,21 @@ fn test_type_required_boolean() {
let query = r#"[:find ?x :where [?x _ ?e] [(boolean ?e)]]"#; let query = r#"[:find ?x :where [?x _ ?e] [(boolean ?e)]]"#;
let SQLQuery { sql, args } = translate(&schema, query); let SQLQuery { sql, args } = translate(&schema, query);
assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x` \ assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` \
FROM `all_datoms` AS `all_datoms00` \ FROM `datoms` AS `datoms00` \
WHERE (`all_datoms00`.value_type_tag = 1)"); WHERE (`datoms00`.value_type_tag = 1)");
assert_eq!(args, vec![]); assert_eq!(args, vec![]);
} }
#[test] #[test]
fn test_type_require_avoids_all_datoms() { fn test_type_required_string() {
let schema = Schema::default(); let schema = Schema::default();
// Since the constraint is first, we know we don't need to use all_datoms.
let query = r#"[:find ?x :where [(keyword ?e)] [?x _ ?e]]"#; let query = r#"[:find ?x :where [?x _ ?e] [(string ?e)]]"#;
let SQLQuery { sql, args } = translate(&schema, query);
assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` \
FROM `datoms` AS `datoms00` \
WHERE (`datoms00`.value_type_tag = 13)");
assert_eq!(args, vec![]);
// Strings always need to use all_datoms.
let query = r#"[:find ?x :where [(string ?e)] [?x _ ?e]]"#;
let SQLQuery { sql, args } = translate(&schema, query); let SQLQuery { sql, args } = translate(&schema, query);
// Note: strings should use `all_datoms` and not `datoms`.
assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x` \ assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x` \
FROM `all_datoms` AS `all_datoms00` \ FROM `all_datoms` AS `all_datoms00` \
WHERE (`all_datoms00`.value_type_tag = 10)"); WHERE (`all_datoms00`.value_type_tag = 10)");