Part 6: implement decision tree for processing simple alternation.
This commit is contained in:
parent
74f188df9b
commit
2b2b5cf696
1 changed files with 307 additions and 4 deletions
|
@ -20,12 +20,16 @@ use mentat_core::{
|
||||||
|
|
||||||
use mentat_query::{
|
use mentat_query::{
|
||||||
NonIntegerConstant,
|
NonIntegerConstant,
|
||||||
|
OrJoin,
|
||||||
|
OrWhereClause,
|
||||||
Pattern,
|
Pattern,
|
||||||
PatternValuePlace,
|
PatternValuePlace,
|
||||||
PatternNonValuePlace,
|
PatternNonValuePlace,
|
||||||
PlainSymbol,
|
PlainSymbol,
|
||||||
Predicate,
|
Predicate,
|
||||||
SrcVar,
|
SrcVar,
|
||||||
|
UnifyVars,
|
||||||
|
WhereClause,
|
||||||
};
|
};
|
||||||
|
|
||||||
use clauses::ConjoiningClauses;
|
use clauses::ConjoiningClauses;
|
||||||
|
@ -43,16 +47,12 @@ use types::{
|
||||||
DatomsTable,
|
DatomsTable,
|
||||||
EmptyBecause,
|
EmptyBecause,
|
||||||
NumericComparison,
|
NumericComparison,
|
||||||
OrJoinKind,
|
|
||||||
QualifiedAlias,
|
QualifiedAlias,
|
||||||
QueryValue,
|
QueryValue,
|
||||||
SourceAlias,
|
SourceAlias,
|
||||||
TableAlias,
|
TableAlias,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/// Return true if both left and right are the same variable or both are non-variable.
|
/// Return true if both left and right are the same variable or both are non-variable.
|
||||||
fn _simply_matches_place(left: &PatternNonValuePlace, right: &PatternNonValuePlace) -> bool {
|
fn _simply_matches_place(left: &PatternNonValuePlace, right: &PatternNonValuePlace) -> bool {
|
||||||
match (left, right) {
|
match (left, right) {
|
||||||
|
@ -78,3 +78,306 @@ fn _simply_matches_value_place(left: &PatternValuePlace, right: &PatternValuePla
|
||||||
_ => true,
|
_ => true,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub enum DeconstructedOrJoin {
|
||||||
|
KnownSuccess,
|
||||||
|
KnownEmpty(EmptyBecause),
|
||||||
|
Unit(OrWhereClause),
|
||||||
|
UnitPattern(Pattern),
|
||||||
|
Simple(Vec<Pattern>),
|
||||||
|
Complex(OrJoin),
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Application of `or`. Note that this is recursive!
|
||||||
|
impl ConjoiningClauses {
|
||||||
|
fn apply_or_where_clause(&mut self, schema: &Schema, clause: OrWhereClause) -> Result<()> {
|
||||||
|
match clause {
|
||||||
|
OrWhereClause::Clause(clause) => self.apply_clause(schema, clause),
|
||||||
|
|
||||||
|
// A query might be:
|
||||||
|
// [:find ?x :where (or (and [?x _ 5] [?x :foo/bar 7]))]
|
||||||
|
// which is equivalent to dropping the `or` _and_ the `and`!
|
||||||
|
OrWhereClause::And(clauses) => {
|
||||||
|
for clause in clauses {
|
||||||
|
self.apply_clause(schema, clause)?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn apply_or_join(&mut self, schema: &Schema, mut or_join: OrJoin) -> Result<()> {
|
||||||
|
// Simple optimization. Empty `or` clauses disappear. Unit `or` clauses
|
||||||
|
// are equivalent to just the inner clause.
|
||||||
|
match or_join.clauses.len() {
|
||||||
|
0 => Ok(()),
|
||||||
|
1 => self.apply_or_where_clause(schema, or_join.clauses.pop().unwrap()),
|
||||||
|
_ => self.apply_non_trivial_or_join(schema, or_join),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Find out if the `OrJoin` is simple. A simple `or` is one in
|
||||||
|
/// which:
|
||||||
|
/// - Every arm is a pattern, so that we can use a single table alias for all.
|
||||||
|
/// - Each pattern should run against the same table, for the same reason.
|
||||||
|
/// - Each pattern uses the same variables. (That's checked by validation.)
|
||||||
|
/// - Each pattern has the same shape, so we can extract bindings from the same columns
|
||||||
|
/// regardless of which clause matched.
|
||||||
|
///
|
||||||
|
/// Like this:
|
||||||
|
///
|
||||||
|
/// ```edn
|
||||||
|
/// [:find ?x
|
||||||
|
/// :where (or [?x :foo/knows "John"]
|
||||||
|
/// [?x :foo/parent "Ámbar"]
|
||||||
|
/// [?x :foo/knows "Daphne"])]
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// While we're doing this diagnosis, we'll also find out if:
|
||||||
|
/// - No patterns can match: the enclosing CC is known-empty.
|
||||||
|
/// - Some patterns can't match: they are discarded.
|
||||||
|
/// - Only one pattern can match: the `or` can be simplified away.
|
||||||
|
fn deconstruct_or_join(&self, schema: &Schema, or_join: OrJoin) -> DeconstructedOrJoin {
|
||||||
|
// If we have explicit non-maximal unify-vars, we *can't* simply run this as a
|
||||||
|
// single pattern --
|
||||||
|
// ```
|
||||||
|
// [:find ?x :where [?x :foo/bar ?y] (or-join [?x] [?x :foo/baz ?y])]
|
||||||
|
// ```
|
||||||
|
// is *not* equivalent to
|
||||||
|
// ```
|
||||||
|
// [:find ?x :where [?x :foo/bar ?y] [?x :foo/baz ?y]]
|
||||||
|
// ```
|
||||||
|
if !or_join.is_fully_unified() {
|
||||||
|
// It's complex because we need to make sure that non-unified vars
|
||||||
|
// mentioned in the body of the `or-join` do not unify with variables
|
||||||
|
// outside the `or-join`. We can't naïvely collect clauses into the
|
||||||
|
// same CC. TODO: pay attention to the unify list when generating
|
||||||
|
// constraints. Temporarily shadow variables within each `or` branch.
|
||||||
|
return DeconstructedOrJoin::Complex(or_join);
|
||||||
|
}
|
||||||
|
|
||||||
|
match or_join.clauses.len() {
|
||||||
|
0 => DeconstructedOrJoin::KnownSuccess,
|
||||||
|
|
||||||
|
// It's safe to simply 'leak' the entire clause, because we know every var in it is
|
||||||
|
// supposed to unify with the enclosing form.
|
||||||
|
1 => DeconstructedOrJoin::Unit(or_join.clauses.into_iter().next().unwrap()),
|
||||||
|
_ => self._deconstruct_or_join(schema, or_join),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// This helper does the work of taking a known-non-trivial `or` or `or-join`,
|
||||||
|
/// walking the contained patterns to decide whether it can be translated simply
|
||||||
|
/// -- as a collection of constraints on a single table alias -- or if it needs to
|
||||||
|
/// be implemented as a `UNION`.
|
||||||
|
///
|
||||||
|
/// See the description of `deconstruct_or_join` for more details. This method expects
|
||||||
|
/// to be called _only_ by `deconstruct_or_join`.
|
||||||
|
fn _deconstruct_or_join(&self, schema: &Schema, or_join: OrJoin) -> DeconstructedOrJoin {
|
||||||
|
// Preconditions enforced by `deconstruct_or_join`.
|
||||||
|
assert_eq!(or_join.unify_vars, UnifyVars::Implicit);
|
||||||
|
assert!(or_join.clauses.len() >= 2);
|
||||||
|
|
||||||
|
// We're going to collect into this.
|
||||||
|
// If at any point we hit something that's not a suitable pattern, we'll
|
||||||
|
// reconstruct and return a complex `OrJoin`.
|
||||||
|
let mut patterns: Vec<Pattern> = Vec::with_capacity(or_join.clauses.len());
|
||||||
|
|
||||||
|
// Keep track of the table we need every pattern to use.
|
||||||
|
let mut expected_table: Option<DatomsTable> = None;
|
||||||
|
|
||||||
|
// Technically we might have several reasons, but we take the last -- that is, that's the
|
||||||
|
// reason we don't have at least one pattern!
|
||||||
|
// We'll return this as our reason if no pattern can return results.
|
||||||
|
let mut empty_because: Option<EmptyBecause> = None;
|
||||||
|
|
||||||
|
// Walk each clause in turn, bailing as soon as we know this can't be simple.
|
||||||
|
let mut clauses = or_join.clauses.into_iter();
|
||||||
|
while let Some(clause) = clauses.next() {
|
||||||
|
// If we fail half-way through processing, we want to reconstitute the input.
|
||||||
|
// Keep a handle to the clause itself here to smooth over the moved `if let` below.
|
||||||
|
let last: OrWhereClause;
|
||||||
|
|
||||||
|
if let OrWhereClause::Clause(WhereClause::Pattern(p)) = clause {
|
||||||
|
// Compute the table for the pattern. If we can't figure one out, it means
|
||||||
|
// the pattern cannot succeed; we drop it.
|
||||||
|
// Inside an `or` it's not a failure for a pattern to be unable to match, which
|
||||||
|
// manifests as a table being unable to be found.
|
||||||
|
let table = self.table_for_places(schema, &p.attribute, &p.value);
|
||||||
|
match table {
|
||||||
|
Err(e) => {
|
||||||
|
empty_because = Some(e);
|
||||||
|
|
||||||
|
// Do not accumulate this pattern at all. Add lightness!
|
||||||
|
continue;
|
||||||
|
},
|
||||||
|
Ok(table) => {
|
||||||
|
// Check the shape of the pattern against a previous pattern.
|
||||||
|
let same_shape =
|
||||||
|
if let Some(template) = patterns.get(0) {
|
||||||
|
template.source == p.source && // or-arms all use the same source anyway.
|
||||||
|
_simply_matches_place(&template.entity, &p.entity) &&
|
||||||
|
_simply_matches_place(&template.attribute, &p.attribute) &&
|
||||||
|
_simply_matches_value_place(&template.value, &p.value) &&
|
||||||
|
_simply_matches_place(&template.tx, &p.tx)
|
||||||
|
} else {
|
||||||
|
// No previous pattern.
|
||||||
|
true
|
||||||
|
};
|
||||||
|
|
||||||
|
// All of our clauses that _do_ yield a table -- that are possible --
|
||||||
|
// must use the same table in order for this to be a simple `or`!
|
||||||
|
if same_shape {
|
||||||
|
if expected_table == Some(table) {
|
||||||
|
patterns.push(p);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if expected_table.is_none() {
|
||||||
|
expected_table = Some(table);
|
||||||
|
patterns.push(p);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise, we need to keep this pattern so we can reconstitute.
|
||||||
|
// We'll fall through to reconstruction.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
last = OrWhereClause::Clause(WhereClause::Pattern(p));
|
||||||
|
} else {
|
||||||
|
last = clause;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we get here, it means one of our checks above failed. Reconstruct and bail.
|
||||||
|
let reconstructed: Vec<OrWhereClause> =
|
||||||
|
// Non-empty patterns already collected…
|
||||||
|
patterns.into_iter()
|
||||||
|
.map(|p| OrWhereClause::Clause(WhereClause::Pattern(p)))
|
||||||
|
// … then the clause we just considered…
|
||||||
|
.chain(::std::iter::once(last))
|
||||||
|
// … then the rest of the iterator.
|
||||||
|
.chain(clauses)
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
return DeconstructedOrJoin::Complex(OrJoin {
|
||||||
|
unify_vars: UnifyVars::Implicit,
|
||||||
|
clauses: reconstructed,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we got here without returning, then `patterns` is what we're working with.
|
||||||
|
// If `patterns` is empty, it means _none_ of the clauses in the `or` could succeed.
|
||||||
|
match patterns.len() {
|
||||||
|
0 => {
|
||||||
|
assert!(empty_because.is_some());
|
||||||
|
DeconstructedOrJoin::KnownEmpty(empty_because.unwrap())
|
||||||
|
},
|
||||||
|
1 => DeconstructedOrJoin::UnitPattern(patterns.pop().unwrap()),
|
||||||
|
_ => DeconstructedOrJoin::Simple(patterns),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Only call this with an `or_join` with 2 or more patterns.
|
||||||
|
fn apply_non_trivial_or_join(&mut self, schema: &Schema, or_join: OrJoin) -> Result<()> {
|
||||||
|
assert!(or_join.clauses.len() >= 2);
|
||||||
|
|
||||||
|
match self.deconstruct_or_join(schema, or_join) {
|
||||||
|
DeconstructedOrJoin::KnownSuccess => {
|
||||||
|
// The pattern came to us empty -- `(or)`. Do nothing.
|
||||||
|
Ok(())
|
||||||
|
},
|
||||||
|
DeconstructedOrJoin::KnownEmpty(reason) => {
|
||||||
|
// There were no arms of the join that could be mapped to a table.
|
||||||
|
// The entire `or`, and thus the CC, cannot yield results.
|
||||||
|
self.mark_known_empty(reason);
|
||||||
|
Ok(())
|
||||||
|
},
|
||||||
|
DeconstructedOrJoin::Unit(clause) => {
|
||||||
|
// There was only one clause. We're unifying all variables, so we can just apply here.
|
||||||
|
self.apply_or_where_clause(schema, clause)
|
||||||
|
},
|
||||||
|
DeconstructedOrJoin::UnitPattern(pattern) => {
|
||||||
|
// Same, but simpler.
|
||||||
|
self.apply_pattern(schema, pattern);
|
||||||
|
Ok(())
|
||||||
|
},
|
||||||
|
DeconstructedOrJoin::Simple(patterns) => {
|
||||||
|
// Hooray! Fully unified and plain ol' patterns that all use the same table.
|
||||||
|
// Go right ahead and produce a set of constraint alternations that we can collect,
|
||||||
|
// using a single table alias.
|
||||||
|
// TODO
|
||||||
|
self.apply_simple_or_join(schema, patterns)
|
||||||
|
},
|
||||||
|
DeconstructedOrJoin::Complex(_) => {
|
||||||
|
// Do this the hard way. TODO
|
||||||
|
unimplemented!();
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// A simple `or` join is effectively a single pattern in which an individual column's bindings
|
||||||
|
/// are not a single value. Rather than a pattern like
|
||||||
|
///
|
||||||
|
/// ```edn
|
||||||
|
/// [?x :foo/knows "John"]
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// we have
|
||||||
|
///
|
||||||
|
/// ```edn
|
||||||
|
/// (or [?x :foo/knows "John"]
|
||||||
|
/// [?x :foo/hates "Peter"])
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// but the generated SQL is very similar: the former is
|
||||||
|
///
|
||||||
|
/// ```sql
|
||||||
|
/// WHERE datoms00.a = 99 AND datoms00.v = 'John'
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// with the latter growing to
|
||||||
|
///
|
||||||
|
/// ```sql
|
||||||
|
/// WHERE (datoms00.a = 99 AND datoms00.v = 'John')
|
||||||
|
/// OR (datoms00.a = 98 AND datoms00.v = 'Peter')
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
fn apply_simple_or_join(&mut self, schema: &Schema, patterns: Vec<Pattern>) -> Result<()> {
|
||||||
|
assert!(patterns.len() >= 2);
|
||||||
|
|
||||||
|
// Each constant attribute might _expand_ the set of possible types of the value-place
|
||||||
|
// variable. We thus generate a set of possible types, and we intersect it with the
|
||||||
|
// types already possible in the CC. If the resultant set is empty, the pattern cannot match.
|
||||||
|
// If the final set isn't unit, we must project a type tag column.
|
||||||
|
// If one of the alternations requires a type that is impossible in the CC, then we can
|
||||||
|
// discard that alternate:
|
||||||
|
//
|
||||||
|
// ```edn
|
||||||
|
// [:find ?x
|
||||||
|
// :where [?a :some/int ?x]
|
||||||
|
// (or [_ :some/otherint ?x]
|
||||||
|
// [_ :some/string ?x])]
|
||||||
|
// ```
|
||||||
|
//
|
||||||
|
// can simplify to
|
||||||
|
//
|
||||||
|
// ```edn
|
||||||
|
// [:find ?x
|
||||||
|
// :where [?a :some/int ?x]
|
||||||
|
// [_ :some/otherint ?x]]
|
||||||
|
// ```
|
||||||
|
//
|
||||||
|
// Similarly, if the value place is constant, it must be of a type that doesn't determine
|
||||||
|
// a different table for any of the patterns.
|
||||||
|
// TODO
|
||||||
|
|
||||||
|
// Begin by building a base CC that we'll use to produce constraints from each pattern.
|
||||||
|
// Populate this base CC with whatever variables are already known from the CC to which
|
||||||
|
// we're applying this `or`.
|
||||||
|
// This will give us any applicable type constraints or column mappings.
|
||||||
|
// Then generate a single table alias, based on the first pattern, and use that to make any
|
||||||
|
// new variable mappings we will need to extract values.
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue