Part 3: finish expansion and translation of complex or.

This commit turns complex `or` -- `or`s in which not all variables are
unified, or in which not all arms are the same shape -- into a
computed table.

We do this by building a template CC that shares some state with the
destination CC, applying each arm of the `or` to a copy of the template
as if it were a standalone query, then building a projection list and
creating a `ComputedTable::Union`. This is pushed into the destination
CC's `computed_tables` list.

Finally, the variables projected from the UNION are bound in the
destination CC, so that unification occurs, and projection of the
outermost query can use bindings established by the `or-join`.

This commit includes projection of type codes from heterogeneous `UNION`
arms: we compute a list of variables for which a definite type is
unknown in at least one arm, and force all arms to project either a type
tag column or a fixed type. It's important that each branch of a UNION
project the same columns in the same order, hence the projection of
fixed values.

The translator is similarly extended to project the type tag column name
or the known value_type_tag to support this.

Review comment: clarify union type extraction.
This commit is contained in:
Richard Newman 2017-04-11 10:31:31 -07:00
parent 08d2c613a4
commit d8075aa07d
4 changed files with 345 additions and 20 deletions

View file

@ -36,8 +36,12 @@ use types::{
ColumnConstraintOrAlternation,
ColumnAlternation,
ColumnIntersection,
ComputedTable,
DatomsTable,
EmptyBecause,
QualifiedAlias,
SourceAlias,
VariableColumn,
};
/// Return true if both left and right are the same variable or both are non-variable.
@ -66,6 +70,18 @@ fn _simply_matches_value_place(left: &PatternValuePlace, right: &PatternValuePla
}
}
trait PushComputed {
fn push_computed(&mut self, item: ComputedTable) -> DatomsTable;
}
impl PushComputed for Vec<ComputedTable> {
fn push_computed(&mut self, item: ComputedTable) -> DatomsTable {
let next_index = self.len();
self.push(item);
DatomsTable::Computed(next_index)
}
}
pub enum DeconstructedOrJoin {
KnownSuccess,
KnownEmpty(EmptyBecause),
@ -176,6 +192,8 @@ impl ConjoiningClauses {
/// to be called _only_ by `deconstruct_or_join`.
fn _deconstruct_or_join(&self, schema: &Schema, or_join: OrJoin) -> DeconstructedOrJoin {
// Preconditions enforced by `deconstruct_or_join`.
// Note that a fully unified explicit `or-join` can arrive here, and might leave as
// an implicit `or`.
assert!(or_join.is_fully_unified());
assert!(or_join.clauses.len() >= 2);
@ -193,7 +211,7 @@ impl ConjoiningClauses {
let mut empty_because: Option<EmptyBecause> = None;
// Walk each clause in turn, bailing as soon as we know this can't be simple.
let (join_clauses, mentioned_vars) = or_join.dismember();
let (join_clauses, _unify_vars, mentioned_vars) = or_join.dismember();
let mut clauses = join_clauses.into_iter();
while let Some(clause) = clauses.next() {
// If we fail half-way through processing, we want to reconstitute the input.
@ -306,9 +324,9 @@ impl ConjoiningClauses {
// using a single table alias.
self.apply_simple_or_join(schema, patterns, mentioned_vars)
},
DeconstructedOrJoin::Complex(_) => {
// Do this the hard way. TODO
unimplemented!();
DeconstructedOrJoin::Complex(or_join) => {
// Do this the hard way.
self.apply_complex_or_join(schema, or_join)
},
}
}
@ -341,7 +359,11 @@ impl ConjoiningClauses {
/// OR (datoms00.a = 98 AND datoms00.v = 'Peter')
/// ```
///
fn apply_simple_or_join(&mut self, schema: &Schema, patterns: Vec<Pattern>, mentioned_vars: BTreeSet<Variable>) -> Result<()> {
fn apply_simple_or_join(&mut self,
schema: &Schema,
patterns: Vec<Pattern>,
mentioned_vars: BTreeSet<Variable>)
-> Result<()> {
if self.is_known_empty() {
return Ok(())
}
@ -481,6 +503,175 @@ impl ConjoiningClauses {
self.narrow_types(cc.known_types);
Ok(())
}
/// Apply a provided `or` or `or-join` to this `ConjoiningClauses`. If you're calling this
/// rather than another `or`-applier, it's assumed that the contents of the `or` are relatively
/// complex: perhaps its arms consist of more than just patterns, or perhaps each arm includes
/// different variables in different places.
///
/// Step one (not yet implemented): any clauses that are standalone patterns might differ only
/// in attribute. In that case, we can treat them as a 'simple or' -- a single pattern with a
/// WHERE clause that alternates on the attribute. Pull those out first.
///
/// Step two: for each cluster of patterns, and for each `and`, recursively build a CC and
/// simple projection. The projection must be the same for each CC, because we will concatenate
/// these with a `UNION`. This is one reason why we require each pattern in the `or` to unify
/// the same variables!
///
/// Finally, we alias this entire UNION block as a FROM; it can be stitched into the outer query
/// by looking at the projection.
///
/// For example,
///
/// ```edn
/// [:find ?page :in $ ?string :where
/// (or [?page :page/title ?string]
/// [?page :page/excerpt ?string]
/// (and [?save :save/string ?string]
/// [?page :page/save ?save]))]
/// ```edn
///
/// would expand to something like
///
/// ```sql
/// SELECT or123.page AS page FROM
/// (SELECT datoms124.e AS page FROM datoms AS datoms124
/// WHERE datoms124.v = ? AND
/// (datoms124.a = :page/title OR
/// datoms124.a = :page/excerpt)
/// UNION
/// SELECT datoms126.e AS page FROM datoms AS datoms125, datoms AS datoms126
/// WHERE datoms125.a = :save/string AND
/// datoms125.v = ? AND
/// datoms126.v = datoms125.e AND
/// datoms126.a = :page/save)
/// AS or123
/// ```
///
/// Note that a top-level standalone `or` doesn't really need to be aliased, but
/// it shouldn't do any harm.
fn apply_complex_or_join(&mut self, schema: &Schema, or_join: OrJoin) -> Result<()> {
// N.B., a solitary pattern here *cannot* be simply applied to the enclosing CC. We don't
// want to join all the vars, and indeed if it were safe to do so, we wouldn't have ended up
// in this function!
let (join_clauses, unify_vars, mentioned_vars) = or_join.dismember();
let projected = match unify_vars {
UnifyVars::Implicit => mentioned_vars.into_iter().collect(),
UnifyVars::Explicit(vs) => vs.into_iter().collect(),
};
let template = self.use_as_template(&projected);
let mut acc = Vec::with_capacity(join_clauses.len());
let mut empty_because: Option<EmptyBecause> = None;
for clause in join_clauses.into_iter() {
let mut receptacle = template.make_receptacle();
match clause {
OrWhereClause::And(clauses) => {
for clause in clauses {
receptacle.apply_clause(&schema, clause)?;
}
},
OrWhereClause::Clause(clause) => {
receptacle.apply_clause(&schema, clause)?;
},
}
if receptacle.is_known_empty() {
empty_because = receptacle.empty_because;
} else {
receptacle.expand_column_bindings();
receptacle.prune_extracted_types();
acc.push(receptacle);
}
}
if acc.is_empty() {
self.mark_known_empty(empty_because.expect("empty for a reason"));
return Ok(());
}
// TODO: optimize the case of a single element in `acc`?
// Now `acc` contains a sequence of CCs that were all prepared with the same types,
// each ready to project the same variables.
// At this point we can lift out any common type information (and even constraints) to the
// destination CC.
// We must also contribute type extraction information for any variables that aren't
// concretely typed for all union arms.
//
// We walk the list of variables to unify -- which will become our projection
// list -- to find out its type info in each CC. We might:
//
// 1. Know the type concretely from the enclosing CC. Don't project a type tag from the
// union. Example:
// ```
// [:find ?x ?y
// :where [?x :foo/int ?y]
// (or [(< ?y 10)]
// [_ :foo/verified ?y])]
// ```
// 2. Not know the type, but every CC bound it to the same single type. Don't project a type
// tag; we simply contribute the single type to the enclosing CC. Example:
// ```
// [:find ?x ?y
// :where (or [?x :foo/length ?y]
// [?x :foo/width ?y])]
// ```
// 3. (a) Have every CC come up with a non-unit type set for the var. Every CC will project
// a type tag column from one of its internal bindings, and the union will project it
// onwards. Example:
// ```
// [:find ?x ?y ?z
// :where [?x :foo/knows ?y]
// (or [?x _ ?z]
// [?y _ ?z])]
// ```
// 3. (b) Have some or all CCs come up with a unit type set. Every CC will project a type
// tag column, and those with a unit type set will project a fixed constant value.
// Again, the union will pass this on.
// ```
// [:find ?x ?y
// :where (or [?x :foo/length ?y]
// [?x _ ?y])]
// ```
let projection: BTreeSet<Variable> = projected.into_iter().collect();
let mut type_needed: BTreeSet<Variable> = BTreeSet::default();
// For any variable which has an imprecise type anywhere in the UNION, add it to the
// set that needs type extraction. All UNION arms must project the same columns.
for var in projection.iter() {
if acc.iter().any(|cc| !cc.known_type(var).is_some()) {
type_needed.insert(var.clone());
}
}
// Hang on to these so we can stuff them in our column bindings.
let var_associations: Vec<Variable>;
let type_associations: Vec<Variable>;
{
var_associations = projection.iter().cloned().collect();
type_associations = type_needed.iter().cloned().collect();
}
let union = ComputedTable::Union {
projection: projection,
type_extraction: type_needed,
arms: acc,
};
let table = self.computed_tables.push_computed(union);
let alias = self.next_alias_for_table(table);
// Stitch the computed table into column_bindings, so we get cross-linking.
for var in var_associations.into_iter() {
self.bind_column_to_var(schema, alias.clone(), VariableColumn::Variable(var.clone()), var);
}
for var in type_associations.into_iter() {
self.extracted_types.insert(var.clone(), QualifiedAlias::new(alias.clone(), VariableColumn::VariableTypeTag(var)));
}
self.from.push(SourceAlias(table, alias));
Ok(())
}
}
#[cfg(test)]

View file

@ -20,6 +20,7 @@ use mentat_query_algebrizer::{
ColumnConstraint,
ColumnConstraintOrAlternation,
ColumnIntersection,
ColumnName,
ComputedTable,
ConjoiningClauses,
DatomsColumn,
@ -28,6 +29,7 @@ use mentat_query_algebrizer::{
QueryValue,
SourceAlias,
TableAlias,
VariableColumn,
};
use mentat_query_projector::{
@ -177,19 +179,53 @@ fn table_for_computed(computed: ComputedTable, alias: TableAlias) -> TableOrSubq
projection, type_extraction, arms,
} => {
// The projection list for each CC must have the same shape and the same names.
// The values we project might be fixed or they might be columns, and of course
// each arm will have different columns.
// TODO: type extraction.
let queries = arms.into_iter()
.map(|cc| {
let var_columns = projection.iter().map(|var| {
let col = cc.column_bindings.get(&var).unwrap()[0].clone();
ProjectedColumn(ColumnOrExpression::Column(col), var.to_string())
}).collect();
let projection = Projection::Columns(var_columns);
cc_to_select_query(projection, cc, false, None)
}).collect();
TableOrSubquery::Union(queries, alias)
// The values we project might be fixed or they might be columns.
TableOrSubquery::Union(
arms.into_iter()
.map(|cc| {
// We're going to end up with the variables being projected and also some
// type tag columns.
let mut columns: Vec<ProjectedColumn> = Vec::with_capacity(projection.len() + type_extraction.len());
// For each variable, find out which column it maps to within this arm, and
// project it as the variable name.
// E.g., SELECT datoms03.v AS `?x`.
for var in projection.iter() {
let col = cc.column_bindings.get(&var).unwrap()[0].clone();
let proj = ProjectedColumn(ColumnOrExpression::Column(col), var.to_string());
columns.push(proj);
}
// Similarly, project type tags if they're not known conclusively in the
// outer query.
for var in type_extraction.iter() {
let expression =
if let Some(known) = cc.known_type(var) {
// If we know the type for sure, just project the constant.
// SELECT datoms03.v AS `?x`, 10 AS `?x_value_type_tag`
ColumnOrExpression::Integer(known.value_type_tag())
} else {
// Otherwise, we'll have an established type binding! This'll be
// either a datoms table or, recursively, a subquery. Project
// this:
// SELECT datoms03.v AS `?x`,
// datoms03.value_type_tag AS `?x_value_type_tag`
let extract = cc.extracted_types
.get(var)
.expect("Expected variable to have a known type or an extracted type");
ColumnOrExpression::Column(extract.clone())
};
let type_column = VariableColumn::VariableTypeTag(var.clone());
let proj = ProjectedColumn(expression, type_column.column_name());
columns.push(proj);
}
// Each arm simply turns into a subquery.
// The SQL translation will stuff "UNION" between each arm.
let projection = Projection::Columns(columns);
cc_to_select_query(projection, cc, false, None)
}).collect(),
alias)
},
}
}
@ -205,6 +241,10 @@ fn cc_to_select_query<T: Into<Option<u64>>>(projection: Projection, cc: Conjoini
let from = cc.from;
let mut computed: ConsumableVec<_> = cc.computed_tables.into();
// Why do we put computed tables directly into the `FROM` clause? The alternative is to use
// a CTE (`WITH`). They're typically equivalent, but some SQL systems (notably Postgres)
// treat CTEs as optimization barriers, so a `WITH` can be significantly slower. Given that
// this is easy enough to change later, we'll opt for using direct inclusion in `FROM`.
let tables =
from.into_iter().map(|source_alias| {
match source_alias {

View file

@ -259,3 +259,97 @@ fn test_simple_or_join() {
assert_eq!(sql, "SELECT `datoms01`.v AS `?url`, `datoms02`.v AS `?description` FROM `datoms` AS `datoms00`, `datoms` AS `datoms01`, `datoms` AS `datoms02` WHERE ((`datoms00`.a = 97 AND `datoms00`.v = $v0) OR (`datoms00`.a = 98 AND `datoms00`.v = $v1)) AND `datoms01`.a = 97 AND `datoms02`.a = 99 AND `datoms00`.e = `datoms01`.e AND `datoms00`.e = `datoms02`.e LIMIT 1");
assert_eq!(args, vec![make_arg("$v0", "http://foo.com/"), make_arg("$v1", "Foo")]);
}
#[test]
fn test_complex_or_join() {
let mut schema = Schema::default();
associate_ident(&mut schema, NamespacedKeyword::new("page", "save"), 95);
add_attribute(&mut schema, 95, Attribute {
value_type: ValueType::Ref,
..Default::default()
});
associate_ident(&mut schema, NamespacedKeyword::new("save", "title"), 96);
associate_ident(&mut schema, NamespacedKeyword::new("page", "url"), 97);
associate_ident(&mut schema, NamespacedKeyword::new("page", "title"), 98);
associate_ident(&mut schema, NamespacedKeyword::new("page", "description"), 99);
for x in 96..100 {
add_attribute(&mut schema, x, Attribute {
value_type: ValueType::String,
..Default::default()
});
}
let input = r#"[:find [?url ?description]
:where
(or-join [?page]
[?page :page/url "http://foo.com/"]
[?page :page/title "Foo"]
(and
[?page :page/save ?save]
[?save :save/title "Foo"]))
[?page :page/url ?url]
[?page :page/description ?description]]"#;
let SQLQuery { sql, args } = translate(&schema, input, None);
assert_eq!(sql, "SELECT `datoms04`.v AS `?url`, \
`datoms05`.v AS `?description` \
FROM (SELECT `datoms00`.e AS `?page` \
FROM `datoms` AS `datoms00` \
WHERE `datoms00`.a = 97 \
AND `datoms00`.v = $v0 \
UNION \
SELECT `datoms01`.e AS `?page` \
FROM `datoms` AS `datoms01` \
WHERE `datoms01`.a = 98 \
AND `datoms01`.v = $v1 \
UNION \
SELECT `datoms02`.e AS `?page` \
FROM `datoms` AS `datoms02`, \
`datoms` AS `datoms03` \
WHERE `datoms02`.a = 95 \
AND `datoms03`.a = 96 \
AND `datoms03`.v = $v2 \
AND `datoms02`.v = `datoms03`.e) AS `c00`, \
`datoms` AS `datoms04`, \
`datoms` AS `datoms05` \
WHERE `datoms04`.a = 97 \
AND `datoms05`.a = 99 \
AND `c00`.`?page` = `datoms04`.e \
AND `c00`.`?page` = `datoms05`.e \
LIMIT 1");
assert_eq!(args, vec![make_arg("$v0", "http://foo.com/"),
make_arg("$v1", "Foo"),
make_arg("$v2", "Foo")]);
}
#[test]
fn test_complex_or_join_type_projection() {
let mut schema = Schema::default();
associate_ident(&mut schema, NamespacedKeyword::new("page", "title"), 98);
add_attribute(&mut schema, 98, Attribute {
value_type: ValueType::String,
..Default::default()
});
let input = r#"[:find [?y]
:where
(or
[6 :page/title ?y]
[5 _ ?y])]"#;
let SQLQuery { sql, args } = translate(&schema, input, None);
assert_eq!(sql, "SELECT `c00`.`?y` AS `?y`, \
`c00`.`?y_value_type_tag` AS `?y_value_type_tag` \
FROM (SELECT `datoms00`.v AS `?y`, \
10 AS `?y_value_type_tag` \
FROM `datoms` AS `datoms00` \
WHERE `datoms00`.e = 6 \
AND `datoms00`.a = 98 \
UNION \
SELECT `all_datoms01`.v AS `?y`, \
`all_datoms01`.value_type_tag AS `?y_value_type_tag` \
FROM `all_datoms` AS `all_datoms01` \
WHERE `all_datoms01`.e = 5) AS `c00` \
LIMIT 1");
assert_eq!(args, vec![]);
}

View file

@ -670,12 +670,12 @@ impl ContainsVariables for OrJoin {
}
impl OrJoin {
pub fn dismember(self) -> (Vec<OrWhereClause>, BTreeSet<Variable>) {
pub fn dismember(self) -> (Vec<OrWhereClause>, UnifyVars, BTreeSet<Variable>) {
let vars = match self.mentioned_vars {
Some(m) => m,
None => self.collect_mentioned_variables(),
};
(self.clauses, vars)
(self.clauses, self.unify_vars, vars)
}
pub fn mentioned_variables<'a>(&'a mut self) -> &'a BTreeSet<Variable> {