diff --git a/query-algebrizer/src/clauses/or.rs b/query-algebrizer/src/clauses/or.rs index 8fcb9c9f..d9d59fac 100644 --- a/query-algebrizer/src/clauses/or.rs +++ b/query-algebrizer/src/clauses/or.rs @@ -36,8 +36,12 @@ use types::{ ColumnConstraintOrAlternation, ColumnAlternation, ColumnIntersection, + ComputedTable, DatomsTable, EmptyBecause, + QualifiedAlias, + SourceAlias, + VariableColumn, }; /// Return true if both left and right are the same variable or both are non-variable. @@ -66,6 +70,18 @@ fn _simply_matches_value_place(left: &PatternValuePlace, right: &PatternValuePla } } +trait PushComputed { + fn push_computed(&mut self, item: ComputedTable) -> DatomsTable; +} + +impl PushComputed for Vec { + fn push_computed(&mut self, item: ComputedTable) -> DatomsTable { + let next_index = self.len(); + self.push(item); + DatomsTable::Computed(next_index) + } +} + pub enum DeconstructedOrJoin { KnownSuccess, KnownEmpty(EmptyBecause), @@ -176,6 +192,8 @@ impl ConjoiningClauses { /// to be called _only_ by `deconstruct_or_join`. fn _deconstruct_or_join(&self, schema: &Schema, or_join: OrJoin) -> DeconstructedOrJoin { // Preconditions enforced by `deconstruct_or_join`. + // Note that a fully unified explicit `or-join` can arrive here, and might leave as + // an implicit `or`. assert!(or_join.is_fully_unified()); assert!(or_join.clauses.len() >= 2); @@ -193,7 +211,7 @@ impl ConjoiningClauses { let mut empty_because: Option = None; // Walk each clause in turn, bailing as soon as we know this can't be simple. - let (join_clauses, mentioned_vars) = or_join.dismember(); + let (join_clauses, _unify_vars, mentioned_vars) = or_join.dismember(); let mut clauses = join_clauses.into_iter(); while let Some(clause) = clauses.next() { // If we fail half-way through processing, we want to reconstitute the input. @@ -306,9 +324,9 @@ impl ConjoiningClauses { // using a single table alias. self.apply_simple_or_join(schema, patterns, mentioned_vars) }, - DeconstructedOrJoin::Complex(_) => { - // Do this the hard way. TODO - unimplemented!(); + DeconstructedOrJoin::Complex(or_join) => { + // Do this the hard way. + self.apply_complex_or_join(schema, or_join) }, } } @@ -341,7 +359,11 @@ impl ConjoiningClauses { /// OR (datoms00.a = 98 AND datoms00.v = 'Peter') /// ``` /// - fn apply_simple_or_join(&mut self, schema: &Schema, patterns: Vec, mentioned_vars: BTreeSet) -> Result<()> { + fn apply_simple_or_join(&mut self, + schema: &Schema, + patterns: Vec, + mentioned_vars: BTreeSet) + -> Result<()> { if self.is_known_empty() { return Ok(()) } @@ -481,6 +503,175 @@ impl ConjoiningClauses { self.narrow_types(cc.known_types); Ok(()) } + + /// Apply a provided `or` or `or-join` to this `ConjoiningClauses`. If you're calling this + /// rather than another `or`-applier, it's assumed that the contents of the `or` are relatively + /// complex: perhaps its arms consist of more than just patterns, or perhaps each arm includes + /// different variables in different places. + /// + /// Step one (not yet implemented): any clauses that are standalone patterns might differ only + /// in attribute. In that case, we can treat them as a 'simple or' -- a single pattern with a + /// WHERE clause that alternates on the attribute. Pull those out first. + /// + /// Step two: for each cluster of patterns, and for each `and`, recursively build a CC and + /// simple projection. The projection must be the same for each CC, because we will concatenate + /// these with a `UNION`. This is one reason why we require each pattern in the `or` to unify + /// the same variables! + /// + /// Finally, we alias this entire UNION block as a FROM; it can be stitched into the outer query + /// by looking at the projection. + /// + /// For example, + /// + /// ```edn + /// [:find ?page :in $ ?string :where + /// (or [?page :page/title ?string] + /// [?page :page/excerpt ?string] + /// (and [?save :save/string ?string] + /// [?page :page/save ?save]))] + /// ```edn + /// + /// would expand to something like + /// + /// ```sql + /// SELECT or123.page AS page FROM + /// (SELECT datoms124.e AS page FROM datoms AS datoms124 + /// WHERE datoms124.v = ? AND + /// (datoms124.a = :page/title OR + /// datoms124.a = :page/excerpt) + /// UNION + /// SELECT datoms126.e AS page FROM datoms AS datoms125, datoms AS datoms126 + /// WHERE datoms125.a = :save/string AND + /// datoms125.v = ? AND + /// datoms126.v = datoms125.e AND + /// datoms126.a = :page/save) + /// AS or123 + /// ``` + /// + /// Note that a top-level standalone `or` doesn't really need to be aliased, but + /// it shouldn't do any harm. + fn apply_complex_or_join(&mut self, schema: &Schema, or_join: OrJoin) -> Result<()> { + // N.B., a solitary pattern here *cannot* be simply applied to the enclosing CC. We don't + // want to join all the vars, and indeed if it were safe to do so, we wouldn't have ended up + // in this function! + let (join_clauses, unify_vars, mentioned_vars) = or_join.dismember(); + let projected = match unify_vars { + UnifyVars::Implicit => mentioned_vars.into_iter().collect(), + UnifyVars::Explicit(vs) => vs.into_iter().collect(), + }; + + let template = self.use_as_template(&projected); + + let mut acc = Vec::with_capacity(join_clauses.len()); + let mut empty_because: Option = None; + + for clause in join_clauses.into_iter() { + let mut receptacle = template.make_receptacle(); + match clause { + OrWhereClause::And(clauses) => { + for clause in clauses { + receptacle.apply_clause(&schema, clause)?; + } + }, + OrWhereClause::Clause(clause) => { + receptacle.apply_clause(&schema, clause)?; + }, + } + if receptacle.is_known_empty() { + empty_because = receptacle.empty_because; + } else { + receptacle.expand_column_bindings(); + receptacle.prune_extracted_types(); + acc.push(receptacle); + } + } + + if acc.is_empty() { + self.mark_known_empty(empty_because.expect("empty for a reason")); + return Ok(()); + } + + // TODO: optimize the case of a single element in `acc`? + + // Now `acc` contains a sequence of CCs that were all prepared with the same types, + // each ready to project the same variables. + // At this point we can lift out any common type information (and even constraints) to the + // destination CC. + // We must also contribute type extraction information for any variables that aren't + // concretely typed for all union arms. + // + // We walk the list of variables to unify -- which will become our projection + // list -- to find out its type info in each CC. We might: + // + // 1. Know the type concretely from the enclosing CC. Don't project a type tag from the + // union. Example: + // ``` + // [:find ?x ?y + // :where [?x :foo/int ?y] + // (or [(< ?y 10)] + // [_ :foo/verified ?y])] + // ``` + // 2. Not know the type, but every CC bound it to the same single type. Don't project a type + // tag; we simply contribute the single type to the enclosing CC. Example: + // ``` + // [:find ?x ?y + // :where (or [?x :foo/length ?y] + // [?x :foo/width ?y])] + // ``` + // 3. (a) Have every CC come up with a non-unit type set for the var. Every CC will project + // a type tag column from one of its internal bindings, and the union will project it + // onwards. Example: + // ``` + // [:find ?x ?y ?z + // :where [?x :foo/knows ?y] + // (or [?x _ ?z] + // [?y _ ?z])] + // ``` + // 3. (b) Have some or all CCs come up with a unit type set. Every CC will project a type + // tag column, and those with a unit type set will project a fixed constant value. + // Again, the union will pass this on. + // ``` + // [:find ?x ?y + // :where (or [?x :foo/length ?y] + // [?x _ ?y])] + // ``` + let projection: BTreeSet = projected.into_iter().collect(); + let mut type_needed: BTreeSet = BTreeSet::default(); + + // For any variable which has an imprecise type anywhere in the UNION, add it to the + // set that needs type extraction. All UNION arms must project the same columns. + for var in projection.iter() { + if acc.iter().any(|cc| !cc.known_type(var).is_some()) { + type_needed.insert(var.clone()); + } + } + + // Hang on to these so we can stuff them in our column bindings. + let var_associations: Vec; + let type_associations: Vec; + { + var_associations = projection.iter().cloned().collect(); + type_associations = type_needed.iter().cloned().collect(); + } + + let union = ComputedTable::Union { + projection: projection, + type_extraction: type_needed, + arms: acc, + }; + let table = self.computed_tables.push_computed(union); + let alias = self.next_alias_for_table(table); + + // Stitch the computed table into column_bindings, so we get cross-linking. + for var in var_associations.into_iter() { + self.bind_column_to_var(schema, alias.clone(), VariableColumn::Variable(var.clone()), var); + } + for var in type_associations.into_iter() { + self.extracted_types.insert(var.clone(), QualifiedAlias::new(alias.clone(), VariableColumn::VariableTypeTag(var))); + } + self.from.push(SourceAlias(table, alias)); + Ok(()) + } } #[cfg(test)] diff --git a/query-translator/src/translate.rs b/query-translator/src/translate.rs index 203706a9..b90d9083 100644 --- a/query-translator/src/translate.rs +++ b/query-translator/src/translate.rs @@ -20,6 +20,7 @@ use mentat_query_algebrizer::{ ColumnConstraint, ColumnConstraintOrAlternation, ColumnIntersection, + ColumnName, ComputedTable, ConjoiningClauses, DatomsColumn, @@ -28,6 +29,7 @@ use mentat_query_algebrizer::{ QueryValue, SourceAlias, TableAlias, + VariableColumn, }; use mentat_query_projector::{ @@ -177,19 +179,53 @@ fn table_for_computed(computed: ComputedTable, alias: TableAlias) -> TableOrSubq projection, type_extraction, arms, } => { // The projection list for each CC must have the same shape and the same names. - // The values we project might be fixed or they might be columns, and of course - // each arm will have different columns. - // TODO: type extraction. - let queries = arms.into_iter() - .map(|cc| { - let var_columns = projection.iter().map(|var| { - let col = cc.column_bindings.get(&var).unwrap()[0].clone(); - ProjectedColumn(ColumnOrExpression::Column(col), var.to_string()) - }).collect(); - let projection = Projection::Columns(var_columns); - cc_to_select_query(projection, cc, false, None) - }).collect(); - TableOrSubquery::Union(queries, alias) + // The values we project might be fixed or they might be columns. + TableOrSubquery::Union( + arms.into_iter() + .map(|cc| { + // We're going to end up with the variables being projected and also some + // type tag columns. + let mut columns: Vec = Vec::with_capacity(projection.len() + type_extraction.len()); + + // For each variable, find out which column it maps to within this arm, and + // project it as the variable name. + // E.g., SELECT datoms03.v AS `?x`. + for var in projection.iter() { + let col = cc.column_bindings.get(&var).unwrap()[0].clone(); + let proj = ProjectedColumn(ColumnOrExpression::Column(col), var.to_string()); + columns.push(proj); + } + + // Similarly, project type tags if they're not known conclusively in the + // outer query. + for var in type_extraction.iter() { + let expression = + if let Some(known) = cc.known_type(var) { + // If we know the type for sure, just project the constant. + // SELECT datoms03.v AS `?x`, 10 AS `?x_value_type_tag` + ColumnOrExpression::Integer(known.value_type_tag()) + } else { + // Otherwise, we'll have an established type binding! This'll be + // either a datoms table or, recursively, a subquery. Project + // this: + // SELECT datoms03.v AS `?x`, + // datoms03.value_type_tag AS `?x_value_type_tag` + let extract = cc.extracted_types + .get(var) + .expect("Expected variable to have a known type or an extracted type"); + ColumnOrExpression::Column(extract.clone()) + }; + let type_column = VariableColumn::VariableTypeTag(var.clone()); + let proj = ProjectedColumn(expression, type_column.column_name()); + columns.push(proj); + } + + // Each arm simply turns into a subquery. + // The SQL translation will stuff "UNION" between each arm. + let projection = Projection::Columns(columns); + cc_to_select_query(projection, cc, false, None) + }).collect(), + alias) }, } } @@ -205,6 +241,10 @@ fn cc_to_select_query>>(projection: Projection, cc: Conjoini let from = cc.from; let mut computed: ConsumableVec<_> = cc.computed_tables.into(); + // Why do we put computed tables directly into the `FROM` clause? The alternative is to use + // a CTE (`WITH`). They're typically equivalent, but some SQL systems (notably Postgres) + // treat CTEs as optimization barriers, so a `WITH` can be significantly slower. Given that + // this is easy enough to change later, we'll opt for using direct inclusion in `FROM`. let tables = from.into_iter().map(|source_alias| { match source_alias { diff --git a/query-translator/tests/translate.rs b/query-translator/tests/translate.rs index ca967cfa..02445727 100644 --- a/query-translator/tests/translate.rs +++ b/query-translator/tests/translate.rs @@ -259,3 +259,97 @@ fn test_simple_or_join() { assert_eq!(sql, "SELECT `datoms01`.v AS `?url`, `datoms02`.v AS `?description` FROM `datoms` AS `datoms00`, `datoms` AS `datoms01`, `datoms` AS `datoms02` WHERE ((`datoms00`.a = 97 AND `datoms00`.v = $v0) OR (`datoms00`.a = 98 AND `datoms00`.v = $v1)) AND `datoms01`.a = 97 AND `datoms02`.a = 99 AND `datoms00`.e = `datoms01`.e AND `datoms00`.e = `datoms02`.e LIMIT 1"); assert_eq!(args, vec![make_arg("$v0", "http://foo.com/"), make_arg("$v1", "Foo")]); } + +#[test] +fn test_complex_or_join() { + let mut schema = Schema::default(); + associate_ident(&mut schema, NamespacedKeyword::new("page", "save"), 95); + add_attribute(&mut schema, 95, Attribute { + value_type: ValueType::Ref, + ..Default::default() + }); + + associate_ident(&mut schema, NamespacedKeyword::new("save", "title"), 96); + associate_ident(&mut schema, NamespacedKeyword::new("page", "url"), 97); + associate_ident(&mut schema, NamespacedKeyword::new("page", "title"), 98); + associate_ident(&mut schema, NamespacedKeyword::new("page", "description"), 99); + for x in 96..100 { + add_attribute(&mut schema, x, Attribute { + value_type: ValueType::String, + ..Default::default() + }); + } + + let input = r#"[:find [?url ?description] + :where + (or-join [?page] + [?page :page/url "http://foo.com/"] + [?page :page/title "Foo"] + (and + [?page :page/save ?save] + [?save :save/title "Foo"])) + [?page :page/url ?url] + [?page :page/description ?description]]"#; + let SQLQuery { sql, args } = translate(&schema, input, None); + assert_eq!(sql, "SELECT `datoms04`.v AS `?url`, \ + `datoms05`.v AS `?description` \ + FROM (SELECT `datoms00`.e AS `?page` \ + FROM `datoms` AS `datoms00` \ + WHERE `datoms00`.a = 97 \ + AND `datoms00`.v = $v0 \ + UNION \ + SELECT `datoms01`.e AS `?page` \ + FROM `datoms` AS `datoms01` \ + WHERE `datoms01`.a = 98 \ + AND `datoms01`.v = $v1 \ + UNION \ + SELECT `datoms02`.e AS `?page` \ + FROM `datoms` AS `datoms02`, \ + `datoms` AS `datoms03` \ + WHERE `datoms02`.a = 95 \ + AND `datoms03`.a = 96 \ + AND `datoms03`.v = $v2 \ + AND `datoms02`.v = `datoms03`.e) AS `c00`, \ + `datoms` AS `datoms04`, \ + `datoms` AS `datoms05` \ + WHERE `datoms04`.a = 97 \ + AND `datoms05`.a = 99 \ + AND `c00`.`?page` = `datoms04`.e \ + AND `c00`.`?page` = `datoms05`.e \ + LIMIT 1"); + assert_eq!(args, vec![make_arg("$v0", "http://foo.com/"), + make_arg("$v1", "Foo"), + make_arg("$v2", "Foo")]); +} + + +#[test] +fn test_complex_or_join_type_projection() { + let mut schema = Schema::default(); + associate_ident(&mut schema, NamespacedKeyword::new("page", "title"), 98); + add_attribute(&mut schema, 98, Attribute { + value_type: ValueType::String, + ..Default::default() + }); + + let input = r#"[:find [?y] + :where + (or + [6 :page/title ?y] + [5 _ ?y])]"#; + let SQLQuery { sql, args } = translate(&schema, input, None); + assert_eq!(sql, "SELECT `c00`.`?y` AS `?y`, \ + `c00`.`?y_value_type_tag` AS `?y_value_type_tag` \ + FROM (SELECT `datoms00`.v AS `?y`, \ + 10 AS `?y_value_type_tag` \ + FROM `datoms` AS `datoms00` \ + WHERE `datoms00`.e = 6 \ + AND `datoms00`.a = 98 \ + UNION \ + SELECT `all_datoms01`.v AS `?y`, \ + `all_datoms01`.value_type_tag AS `?y_value_type_tag` \ + FROM `all_datoms` AS `all_datoms01` \ + WHERE `all_datoms01`.e = 5) AS `c00` \ + LIMIT 1"); + assert_eq!(args, vec![]); +} diff --git a/query/src/lib.rs b/query/src/lib.rs index 5c724c0f..e348030b 100644 --- a/query/src/lib.rs +++ b/query/src/lib.rs @@ -670,12 +670,12 @@ impl ContainsVariables for OrJoin { } impl OrJoin { - pub fn dismember(self) -> (Vec, BTreeSet) { + pub fn dismember(self) -> (Vec, UnifyVars, BTreeSet) { let vars = match self.mentioned_vars { Some(m) => m, None => self.collect_mentioned_variables(), }; - (self.clauses, vars) + (self.clauses, self.unify_vars, vars) } pub fn mentioned_variables<'a>(&'a mut self) -> &'a BTreeSet {