From 0639c9446835226a938910fbc5fb8df3b66b55db Mon Sep 17 00:00:00 2001 From: Richard Newman Date: Tue, 4 Apr 2017 14:54:08 -0700 Subject: [PATCH] Part 2: implement simple `or`. --- query-algebrizer/src/clauses/mod.rs | 131 ++++-- query-algebrizer/src/clauses/or.rs | 580 +++++++++++++++++++++--- query-algebrizer/src/clauses/pattern.rs | 7 +- query-algebrizer/src/lib.rs | 1 + query-algebrizer/src/types.rs | 20 +- query-parser/src/parse.rs | 52 +-- query-parser/tests/find_tests.rs | 40 +- query-translator/src/translate.rs | 13 - query-translator/tests/translate.rs | 51 ++- query/src/lib.rs | 41 +- 10 files changed, 748 insertions(+), 188 deletions(-) diff --git a/query-algebrizer/src/clauses/mod.rs b/query-algebrizer/src/clauses/mod.rs index 846f59e2..8b58526c 100644 --- a/query-algebrizer/src/clauses/mod.rs +++ b/query-algebrizer/src/clauses/mod.rs @@ -8,11 +8,6 @@ // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. -use std::fmt::{ - Debug, - Formatter, -}; - use std::collections::{ BTreeMap, BTreeSet, @@ -21,6 +16,11 @@ use std::collections::{ use std::collections::btree_map::Entry; +use std::fmt::{ + Debug, + Formatter, +}; + use mentat_core::{ Attribute, Entid, @@ -91,6 +91,33 @@ fn unit_type_set(t: ValueType) -> HashSet { s } +trait Contains { + fn when_contains T>(&self, k: &K, f: F) -> Option; +} + +trait Intersection { + fn with_intersected_keys(&self, ks: &BTreeSet) -> Self; +} + +impl Contains for BTreeSet { + fn when_contains T>(&self, k: &K, f: F) -> Option { + if self.contains(k) { + Some(f()) + } else { + None + } + } +} + +impl Intersection for BTreeMap { + /// Return a clone of the map with only keys that are present in `ks`. + fn with_intersected_keys(&self, ks: &BTreeSet) -> Self { + self.iter() + .filter_map(|(k, v)| ks.when_contains(k, || (k.clone(), v.clone()))) + .collect() + } +} + /// A `ConjoiningClauses` (CC) is a collection of clauses that are combined with `JOIN`. /// The topmost form in a query is a `ConjoiningClauses`. /// @@ -191,6 +218,38 @@ impl Default for ConjoiningClauses { } } +/// Cloning. +impl ConjoiningClauses { + fn make_receptacle(&self) -> ConjoiningClauses { + let mut concrete = ConjoiningClauses::default(); + concrete.is_known_empty = self.is_known_empty; + concrete.empty_because = self.empty_because.clone(); + + concrete.input_variables = self.input_variables.clone(); + concrete.value_bindings = self.value_bindings.clone(); + concrete.known_types = self.known_types.clone(); + concrete.extracted_types = self.extracted_types.clone(); + + concrete + } + + /// Make a new CC populated with the relevant variable associations in this CC. + /// Note that the CC's table aliaser is not yet usable. That's not a problem for templating for + /// simple `or`. + fn use_as_template(&self, vars: &BTreeSet) -> ConjoiningClauses { + let mut template = ConjoiningClauses::default(); + template.is_known_empty = self.is_known_empty; + template.empty_because = self.empty_because.clone(); + + template.input_variables = self.input_variables.intersection(vars).cloned().collect(); + template.value_bindings = self.value_bindings.with_intersected_keys(&vars); + template.known_types = self.known_types.with_intersected_keys(&vars); + template.extracted_types = self.extracted_types.with_intersected_keys(&vars); + + template + } +} + impl ConjoiningClauses { #[allow(dead_code)] fn with_value_bindings(bindings: BTreeMap) -> ConjoiningClauses { @@ -201,7 +260,8 @@ impl ConjoiningClauses { // Pre-fill our type mappings with the types of the input bindings. cc.known_types - .extend(cc.value_bindings.iter() + .extend(cc.value_bindings + .iter() .map(|(k, v)| (k.clone(), unit_type_set(v.value_type())))); cc } @@ -311,18 +371,6 @@ impl ConjoiningClauses { /// Marks as known-empty if it's impossible for this type to apply because there's a conflicting /// type already known. fn constrain_var_to_type(&mut self, variable: Variable, this_type: ValueType) { - // If this variable now has a known attribute, we can unhook extracted types for - // any other instances of that variable. - // For example, given - // - // ```edn - // [:find ?v :where [?x ?a ?v] [?y :foo/int ?v]] - // ``` - // - // we will initially choose to extract the type tag for `?v`, but on encountering - // the second pattern we can avoid that. - self.extracted_types.remove(&variable); - // Is there an existing mapping for this variable? // Any known inputs have already been added to known_types, and so if they conflict we'll // spot it here. @@ -336,7 +384,12 @@ impl ConjoiningClauses { /// Like `constrain_var_to_type` but in reverse: this expands the set of types /// with which a variable is associated. - fn broaden_types(&mut self, additional_types: BTreeMap>) { + /// + /// N.B.,: if we ever call `broaden_types` after `is_known_empty` has been set, we might + /// actually move from a state in which a variable can have no type to one that can + /// yield results! We never do so at present -- we carefully set-union types before we + /// set-intersect them -- but this is worth bearing in mind. + pub fn broaden_types(&mut self, additional_types: BTreeMap>) { for (var, new_types) in additional_types { match self.known_types.entry(var) { Entry::Vacant(e) => { @@ -346,12 +399,20 @@ impl ConjoiningClauses { e.insert(new_types); }, Entry::Occupied(mut e) => { + if e.get().is_empty() && self.is_known_empty { + panic!("Uh oh: we failed this pattern, probably because {:?} couldn't match, but now we're broadening its type.", + e.get()); + } e.get_mut().extend(new_types.into_iter()); }, } } } + /// Restrict the known types for `var` to intersect with `types`. + /// If no types are already known -- `var` could have any type -- then this is equivalent to + /// simply setting the known types to `types`. + /// If the known types don't intersect with `types`, mark the pattern as known-empty. fn narrow_types_for_var(&mut self, var: Variable, types: HashSet) { if types.is_empty() { // We hope this never occurs; we should catch this case earlier. @@ -359,10 +420,7 @@ impl ConjoiningClauses { return; } - if types.len() == 1 { - self.extracted_types.remove(&var); - } - + // We can't mutate `empty_because` while we're working with the `Entry`, so do this instead. let mut empty_because: Option = None; match self.known_types.entry(var) { Entry::Vacant(e) => { @@ -372,15 +430,14 @@ impl ConjoiningClauses { // TODO: we shouldn't need to clone here. let intersected: HashSet<_> = types.intersection(e.get()).cloned().collect(); if intersected.is_empty() { - empty_because = Some(EmptyBecause::TypeMismatch(e.key().clone(), - e.get().clone(), - types.iter() - .next() - .cloned() - .unwrap())); - } else { - e.insert(intersected); + let mismatching_type = types.iter().next().unwrap().clone(); + let reason = EmptyBecause::TypeMismatch(e.key().clone(), + e.get().clone(), + mismatching_type); + empty_because = Some(reason); } + // Always insert, even if it's empty! + e.insert(intersected); }, } @@ -389,15 +446,14 @@ impl ConjoiningClauses { } } - fn narrow_types(&mut self, additional_types: BTreeMap>) { + /// Restrict the sets of types for the provided vars to the provided types. + /// See `narrow_types_for_var`. + pub fn narrow_types(&mut self, additional_types: BTreeMap>) { if additional_types.is_empty() { return; } for (var, new_types) in additional_types { self.narrow_types_for_var(var, new_types); - if self.is_known_empty { - return; - } } } @@ -628,9 +684,8 @@ impl ConjoiningClauses { self.apply_predicate(schema, p) }, WhereClause::OrJoin(o) => { - validate_or_join(&o) - //?; - //self.apply_or_join(schema, o) + validate_or_join(&o)?; + self.apply_or_join(schema, o) }, _ => unimplemented!(), } diff --git a/query-algebrizer/src/clauses/or.rs b/query-algebrizer/src/clauses/or.rs index d8e6b1fe..09bacc48 100644 --- a/query-algebrizer/src/clauses/or.rs +++ b/query-algebrizer/src/clauses/or.rs @@ -8,27 +8,21 @@ // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. -// WIP -#![allow(dead_code, unused_imports, unused_variables)] +use std::collections::btree_map::Entry; +use std::collections::BTreeSet; use mentat_core::{ - Entid, Schema, - TypedValue, - ValueType, }; use mentat_query::{ - NonIntegerConstant, OrJoin, OrWhereClause, Pattern, PatternValuePlace, PatternNonValuePlace, - PlainSymbol, - Predicate, - SrcVar, UnifyVars, + Variable, WhereClause, }; @@ -36,21 +30,14 @@ use clauses::ConjoiningClauses; use errors::{ Result, - Error, - ErrorKind, }; use types::{ - ColumnConstraint, + ColumnConstraintOrAlternation, + ColumnAlternation, ColumnIntersection, - DatomsColumn, DatomsTable, EmptyBecause, - NumericComparison, - QualifiedAlias, - QueryValue, - SourceAlias, - TableAlias, }; /// Return true if both left and right are the same variable or both are non-variable. @@ -84,7 +71,7 @@ pub enum DeconstructedOrJoin { KnownEmpty(EmptyBecause), Unit(OrWhereClause), UnitPattern(Pattern), - Simple(Vec), + Simple(Vec, BTreeSet), Complex(OrJoin), } @@ -106,12 +93,26 @@ impl ConjoiningClauses { } } - fn apply_or_join(&mut self, schema: &Schema, mut or_join: OrJoin) -> Result<()> { + pub fn apply_or_join(&mut self, schema: &Schema, mut or_join: OrJoin) -> Result<()> { // Simple optimization. Empty `or` clauses disappear. Unit `or` clauses // are equivalent to just the inner clause. + + // Pre-cache mentioned variables. We use these in a few places. + or_join.mentioned_variables(); + match or_join.clauses.len() { 0 => Ok(()), - 1 => self.apply_or_where_clause(schema, or_join.clauses.pop().unwrap()), + 1 if or_join.is_fully_unified() => { + let clause = or_join.clauses.pop().expect("there's a clause"); + self.apply_or_where_clause(schema, clause) + }, + // Either there's only one clause pattern, and it's not fully unified, or we + // have multiple clauses. + // In the former case we can't just apply it: it includes a variable that we don't want + // to join with the rest of the query. + // Notably, this clause might be an `and`, making this a complex pattern, so we can't + // necessarily rewrite it in place. + // In the latter case, we still need to do a bit more work. _ => self.apply_non_trivial_or_join(schema, or_join), } } @@ -175,7 +176,7 @@ impl ConjoiningClauses { /// to be called _only_ by `deconstruct_or_join`. fn _deconstruct_or_join(&self, schema: &Schema, or_join: OrJoin) -> DeconstructedOrJoin { // Preconditions enforced by `deconstruct_or_join`. - assert_eq!(or_join.unify_vars, UnifyVars::Implicit); + assert!(or_join.is_fully_unified()); assert!(or_join.clauses.len() >= 2); // We're going to collect into this. @@ -192,7 +193,8 @@ impl ConjoiningClauses { let mut empty_because: Option = None; // Walk each clause in turn, bailing as soon as we know this can't be simple. - let mut clauses = or_join.clauses.into_iter(); + let (join_clauses, mentioned_vars) = or_join.dismember(); + let mut clauses = join_clauses.into_iter(); while let Some(clause) = clauses.next() { // If we fail half-way through processing, we want to reconstitute the input. // Keep a handle to the clause itself here to smooth over the moved `if let` below. @@ -259,10 +261,10 @@ impl ConjoiningClauses { .chain(clauses) .collect(); - return DeconstructedOrJoin::Complex(OrJoin { - unify_vars: UnifyVars::Implicit, - clauses: reconstructed, - }); + return DeconstructedOrJoin::Complex(OrJoin::new( + UnifyVars::Implicit, + reconstructed, + )); } // If we got here without returning, then `patterns` is what we're working with. @@ -273,14 +275,11 @@ impl ConjoiningClauses { DeconstructedOrJoin::KnownEmpty(empty_because.unwrap()) }, 1 => DeconstructedOrJoin::UnitPattern(patterns.pop().unwrap()), - _ => DeconstructedOrJoin::Simple(patterns), + _ => DeconstructedOrJoin::Simple(patterns, mentioned_vars), } } - /// Only call this with an `or_join` with 2 or more patterns. fn apply_non_trivial_or_join(&mut self, schema: &Schema, or_join: OrJoin) -> Result<()> { - assert!(or_join.clauses.len() >= 2); - match self.deconstruct_or_join(schema, or_join) { DeconstructedOrJoin::KnownSuccess => { // The pattern came to us empty -- `(or)`. Do nothing. @@ -301,12 +300,11 @@ impl ConjoiningClauses { self.apply_pattern(schema, pattern); Ok(()) }, - DeconstructedOrJoin::Simple(patterns) => { + DeconstructedOrJoin::Simple(patterns, mentioned_vars) => { // Hooray! Fully unified and plain ol' patterns that all use the same table. // Go right ahead and produce a set of constraint alternations that we can collect, // using a single table alias. - // TODO - self.apply_simple_or_join(schema, patterns) + self.apply_simple_or_join(schema, patterns, mentioned_vars) }, DeconstructedOrJoin::Complex(_) => { // Do this the hard way. TODO @@ -343,34 +341,12 @@ impl ConjoiningClauses { /// OR (datoms00.a = 98 AND datoms00.v = 'Peter') /// ``` /// - fn apply_simple_or_join(&mut self, schema: &Schema, patterns: Vec) -> Result<()> { - assert!(patterns.len() >= 2); + fn apply_simple_or_join(&mut self, schema: &Schema, patterns: Vec, mentioned_vars: BTreeSet) -> Result<()> { + if self.is_known_empty { + return Ok(()) + } - // Each constant attribute might _expand_ the set of possible types of the value-place - // variable. We thus generate a set of possible types, and we intersect it with the - // types already possible in the CC. If the resultant set is empty, the pattern cannot match. - // If the final set isn't unit, we must project a type tag column. - // If one of the alternations requires a type that is impossible in the CC, then we can - // discard that alternate: - // - // ```edn - // [:find ?x - // :where [?a :some/int ?x] - // (or [_ :some/otherint ?x] - // [_ :some/string ?x])] - // ``` - // - // can simplify to - // - // ```edn - // [:find ?x - // :where [?a :some/int ?x] - // [_ :some/otherint ?x]] - // ``` - // - // Similarly, if the value place is constant, it must be of a type that doesn't determine - // a different table for any of the patterns. - // TODO + assert!(patterns.len() >= 2); // Begin by building a base CC that we'll use to produce constraints from each pattern. // Populate this base CC with whatever variables are already known from the CC to which @@ -378,6 +354,488 @@ impl ConjoiningClauses { // This will give us any applicable type constraints or column mappings. // Then generate a single table alias, based on the first pattern, and use that to make any // new variable mappings we will need to extract values. + let template = self.use_as_template(&mentioned_vars); + + // We expect this to always work: if it doesn't, it means we should never have got to this + // point. + let source_alias = self.alias_table(schema, &patterns[0]).expect("couldn't get table"); + + // This is where we'll collect everything we eventually add to the destination CC. + let mut folded = ConjoiningClauses::default(); + + // Scoped borrow of source_alias. + { + // Clone this CC once for each pattern. + // Apply each pattern to its CC with the _same_ table alias. + // Each pattern's derived types are intersected with any type constraints in the + // template, sourced from the destination CC. If a variable cannot satisfy both type + // constraints, the new CC cannot match. This prunes the 'or' arms: + // + // ```edn + // [:find ?x + // :where [?a :some/int ?x] + // (or [_ :some/otherint ?x] + // [_ :some/string ?x])] + // ``` + // + // can simplify to + // + // ```edn + // [:find ?x + // :where [?a :some/int ?x] + // [_ :some/otherint ?x]] + // ``` + let mut receptacles = + patterns.into_iter() + .filter_map(|pattern| { + let mut receptacle = template.make_receptacle(); + println!("Applying pattern with attribute {:?}", pattern.attribute); + receptacle.apply_pattern_clause_for_alias(schema, &pattern, &source_alias); + if receptacle.is_known_empty { + println!("Receptacle is empty."); + let reason = receptacle.empty_because; + None + } else { + Some(receptacle) + } + }) + .peekable(); + + // We need to copy the column bindings from one of the receptacles. Because this is a simple + // or, we know that they're all the same. + // Because we just made an empty template, and created a new alias from the destination CC, + // we know that we can blindly merge: collisions aren't possible. + if let Some(first) = receptacles.peek() { + for (v, cols) in &first.column_bindings { + println!("Adding {:?}: {:?}", v, cols); + match self.column_bindings.entry(v.clone()) { + Entry::Vacant(e) => { + e.insert(cols.clone()); + }, + Entry::Occupied(mut e) => { + e.get_mut().append(&mut cols.clone()); + }, + } + } + } else { + // No non-empty receptacles? The destination CC is known-empty, because or([]) is false. + // TODO: get the reason out. + self.mark_known_empty(EmptyBecause::AttributeLookupFailed); + return Ok(()); + } + + // Otherwise, we fold together the receptacles. + // + // Merge together the constraints from each receptacle. Each bundle of constraints is + // combined into a `ConstraintIntersection`, and the collection of intersections is + // combined into a `ConstraintAlternation`. (As an optimization, this collection can be + // simplified.) + // + // Each receptacle's known types are _unioned_. Strictly speaking this is a weakening: + // we might know that if `?x` is an integer then `?y` is a string, or vice versa, but at + // this point we'll simply state that `?x` and `?y` can both be integers or strings. + + fn vec_for_iterator(iter: &I) -> Vec where I: Iterator { + match iter.size_hint().1 { + None => Vec::new(), + Some(expected) => Vec::with_capacity(expected), + } + } + + let mut alternates: Vec = vec_for_iterator(&receptacles); + for r in receptacles { + folded.broaden_types(r.known_types); + alternates.push(r.wheres); + } + + if alternates.len() == 1 { + // Simplify. + folded.wheres = alternates.pop().unwrap(); + } else { + let alternation = ColumnAlternation(alternates); + let mut container = ColumnIntersection::default(); + container.add(ColumnConstraintOrAlternation::Alternation(alternation)); + folded.wheres = container; + } + } + + // Collect the source alias: we use a single table join to represent the entire `or`. + self.from.push(source_alias); + + // Add in the known types and constraints. + // Each constant attribute might _expand_ the set of possible types of the value-place + // variable. We thus generate a set of possible types, and we intersect it with the + // types already possible in the CC. If the resultant set is empty, the pattern cannot + // match. If the final set isn't unit, we must project a type tag column. + self.intersect(folded) + } + + fn intersect(&mut self, mut cc: ConjoiningClauses) -> Result<()> { + if cc.is_known_empty { + self.is_known_empty = true; + self.empty_because = cc.empty_because; + } + self.wheres.append(&mut cc.wheres); + self.narrow_types(cc.known_types); Ok(()) } } + +#[cfg(test)] +mod testing { + extern crate mentat_query_parser; + + use super::*; + + use mentat_core::{ + Attribute, + TypedValue, + ValueType, + }; + + use mentat_query::{ + NamespacedKeyword, + Variable, + }; + + use self::mentat_query_parser::{ + parse_find_string, + }; + + use clauses::{ + add_attribute, + associate_ident, + }; + + use types::{ + ColumnConstraint, + DatomsColumn, + DatomsTable, + NumericComparison, + QualifiedAlias, + QueryValue, + SourceAlias, + }; + + use algebrize; + + fn alg(schema: &Schema, input: &str) -> ConjoiningClauses { + let parsed = parse_find_string(input).expect("parse failed"); + algebrize(schema.into(), parsed).expect("algebrize failed").cc + } + + fn compare_ccs(left: ConjoiningClauses, right: ConjoiningClauses) { + assert_eq!(left.wheres, right.wheres); + assert_eq!(left.from, right.from); + } + + fn prepopulated_schema() -> Schema { + let mut schema = Schema::default(); + associate_ident(&mut schema, NamespacedKeyword::new("foo", "name"), 65); + associate_ident(&mut schema, NamespacedKeyword::new("foo", "knows"), 66); + associate_ident(&mut schema, NamespacedKeyword::new("foo", "parent"), 67); + associate_ident(&mut schema, NamespacedKeyword::new("foo", "age"), 68); + associate_ident(&mut schema, NamespacedKeyword::new("foo", "height"), 69); + add_attribute(&mut schema, 65, Attribute { + value_type: ValueType::String, + multival: false, + ..Default::default() + }); + add_attribute(&mut schema, 66, Attribute { + value_type: ValueType::String, + multival: true, + ..Default::default() + }); + add_attribute(&mut schema, 67, Attribute { + value_type: ValueType::String, + multival: true, + ..Default::default() + }); + add_attribute(&mut schema, 68, Attribute { + value_type: ValueType::Long, + multival: false, + ..Default::default() + }); + add_attribute(&mut schema, 69, Attribute { + value_type: ValueType::Long, + multival: false, + ..Default::default() + }); + schema + } + /// Test that if all the attributes in an `or` fail to resolve, the entire thing fails. + #[test] + fn test_schema_based_failure() { + let schema = Schema::default(); + let query = r#" + [:find ?x + :where (or [?x :foo/nope1 "John"] + [?x :foo/nope2 "Ámbar"] + [?x :foo/nope3 "Daphne"])]"#; + let cc = alg(&schema, query); + assert!(cc.is_known_empty); + assert_eq!(cc.empty_because, Some(EmptyBecause::InvalidAttributeIdent(NamespacedKeyword::new("foo", "nope3")))); + } + + /// Test that if only one of the attributes in an `or` resolves, it's equivalent to a simple query. + #[test] + fn test_only_one_arm_succeeds() { + let schema = prepopulated_schema(); + let query = r#" + [:find ?x + :where (or [?x :foo/nope "John"] + [?x :foo/parent "Ámbar"] + [?x :foo/nope "Daphne"])]"#; + let cc = alg(&schema, query); + assert!(!cc.is_known_empty); + compare_ccs(cc, alg(&schema, r#"[:find ?x :where [?x :foo/parent "Ámbar"]]"#)); + } + + // Simple alternation. + #[test] + fn test_simple_alternation() { + let schema = prepopulated_schema(); + let query = r#" + [:find ?x + :where (or [?x :foo/knows "John"] + [?x :foo/parent "Ámbar"] + [?x :foo/knows "Daphne"])]"#; + let cc = alg(&schema, query); + let vx = Variable::from_valid_name("?x"); + let d0 = "datoms00".to_string(); + let d0e = QualifiedAlias(d0.clone(), DatomsColumn::Entity); + let d0a = QualifiedAlias(d0.clone(), DatomsColumn::Attribute); + let d0v = QualifiedAlias(d0.clone(), DatomsColumn::Value); + let knows = QueryValue::Entid(66); + let parent = QueryValue::Entid(67); + let john = QueryValue::TypedValue(TypedValue::typed_string("John")); + let ambar = QueryValue::TypedValue(TypedValue::typed_string("Ámbar")); + let daphne = QueryValue::TypedValue(TypedValue::typed_string("Daphne")); + + assert!(!cc.is_known_empty); + assert_eq!(cc.wheres, ColumnIntersection(vec![ + ColumnConstraintOrAlternation::Alternation( + ColumnAlternation(vec![ + ColumnIntersection(vec![ + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0a.clone(), knows.clone())), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0v.clone(), john))]), + ColumnIntersection(vec![ + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0a.clone(), parent)), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0v.clone(), ambar))]), + ColumnIntersection(vec![ + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0a.clone(), knows)), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0v.clone(), daphne))]), + ]))])); + assert_eq!(cc.column_bindings.get(&vx), Some(&vec![d0e])); + assert_eq!(cc.from, vec![SourceAlias(DatomsTable::Datoms, d0)]); + } + + // Alternation with a pattern. + #[test] + fn test_alternation_with_pattern() { + let schema = prepopulated_schema(); + let query = r#" + [:find [?x ?name] + :where + [?x :foo/name ?name] + (or [?x :foo/knows "John"] + [?x :foo/parent "Ámbar"] + [?x :foo/knows "Daphne"])]"#; + let cc = alg(&schema, query); + let vx = Variable::from_valid_name("?x"); + let d0 = "datoms00".to_string(); + let d1 = "datoms01".to_string(); + let d0e = QualifiedAlias(d0.clone(), DatomsColumn::Entity); + let d0a = QualifiedAlias(d0.clone(), DatomsColumn::Attribute); + let d1e = QualifiedAlias(d1.clone(), DatomsColumn::Entity); + let d1a = QualifiedAlias(d1.clone(), DatomsColumn::Attribute); + let d1v = QualifiedAlias(d1.clone(), DatomsColumn::Value); + let name = QueryValue::Entid(65); + let knows = QueryValue::Entid(66); + let parent = QueryValue::Entid(67); + let john = QueryValue::TypedValue(TypedValue::typed_string("John")); + let ambar = QueryValue::TypedValue(TypedValue::typed_string("Ámbar")); + let daphne = QueryValue::TypedValue(TypedValue::typed_string("Daphne")); + + assert!(!cc.is_known_empty); + assert_eq!(cc.wheres, ColumnIntersection(vec![ + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0a.clone(), name.clone())), + ColumnConstraintOrAlternation::Alternation( + ColumnAlternation(vec![ + ColumnIntersection(vec![ + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d1a.clone(), knows.clone())), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d1v.clone(), john))]), + ColumnIntersection(vec![ + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d1a.clone(), parent)), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d1v.clone(), ambar))]), + ColumnIntersection(vec![ + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d1a.clone(), knows)), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d1v.clone(), daphne))]), + ])), + // The outer pattern joins against the `or`. + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0e.clone(), QueryValue::Column(d1e.clone()))), + ])); + assert_eq!(cc.column_bindings.get(&vx), Some(&vec![d0e, d1e])); + assert_eq!(cc.from, vec![SourceAlias(DatomsTable::Datoms, d0), + SourceAlias(DatomsTable::Datoms, d1)]); + } + + // Alternation with a pattern and a predicate. + #[test] + fn test_alternation_with_pattern_and_predicate() { + let schema = prepopulated_schema(); + let query = r#" + [:find ?x ?age + :where + [?x :foo/age ?age] + [[< ?age 30]] + (or [?x :foo/knows "John"] + [?x :foo/knows "Daphne"])]"#; + let cc = alg(&schema, query); + let vx = Variable::from_valid_name("?x"); + let d0 = "datoms00".to_string(); + let d1 = "datoms01".to_string(); + let d0e = QualifiedAlias(d0.clone(), DatomsColumn::Entity); + let d0a = QualifiedAlias(d0.clone(), DatomsColumn::Attribute); + let d0v = QualifiedAlias(d0.clone(), DatomsColumn::Value); + let d1e = QualifiedAlias(d1.clone(), DatomsColumn::Entity); + let d1a = QualifiedAlias(d1.clone(), DatomsColumn::Attribute); + let d1v = QualifiedAlias(d1.clone(), DatomsColumn::Value); + let knows = QueryValue::Entid(66); + let age = QueryValue::Entid(68); + let john = QueryValue::TypedValue(TypedValue::typed_string("John")); + let daphne = QueryValue::TypedValue(TypedValue::typed_string("Daphne")); + + assert!(!cc.is_known_empty); + assert_eq!(cc.wheres, ColumnIntersection(vec![ + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0a.clone(), age.clone())), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::NumericInequality { + operator: NumericComparison::LessThan, + left: QueryValue::Column(d0v.clone()), + right: QueryValue::TypedValue(TypedValue::Long(30)), + }), + ColumnConstraintOrAlternation::Alternation( + ColumnAlternation(vec![ + ColumnIntersection(vec![ + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d1a.clone(), knows.clone())), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d1v.clone(), john))]), + ColumnIntersection(vec![ + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d1a.clone(), knows)), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d1v.clone(), daphne))]), + ])), + // The outer pattern joins against the `or`. + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0e.clone(), QueryValue::Column(d1e.clone()))), + ])); + assert_eq!(cc.column_bindings.get(&vx), Some(&vec![d0e, d1e])); + assert_eq!(cc.from, vec![SourceAlias(DatomsTable::Datoms, d0), + SourceAlias(DatomsTable::Datoms, d1)]); + } + + // These two are not equivalent: + // [:find ?x :where [?x :foo/bar ?y] (or-join [?x] [?x :foo/baz ?y])] + // [:find ?x :where [?x :foo/bar ?y] [?x :foo/baz ?y]] + #[test] + #[should_panic(expected = "not yet implemented")] + fn test_unit_or_join_doesnt_flatten() { + let schema = prepopulated_schema(); + let query = r#"[:find ?x + :where [?x :foo/knows ?y] + (or-join [?x] [?x :foo/parent ?y])]"#; + let cc = alg(&schema, query); + let vx = Variable::from_valid_name("?x"); + let vy = Variable::from_valid_name("?y"); + let d0 = "datoms00".to_string(); + let d1 = "datoms01".to_string(); + let d0e = QualifiedAlias(d0.clone(), DatomsColumn::Entity); + let d0a = QualifiedAlias(d0.clone(), DatomsColumn::Attribute); + let d0v = QualifiedAlias(d0.clone(), DatomsColumn::Value); + let d1e = QualifiedAlias(d1.clone(), DatomsColumn::Entity); + let d1a = QualifiedAlias(d1.clone(), DatomsColumn::Attribute); + let knows = QueryValue::Entid(66); + let parent = QueryValue::Entid(67); + + assert!(!cc.is_known_empty); + assert_eq!(cc.wheres, ColumnIntersection(vec![ + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0a.clone(), knows.clone())), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d1a.clone(), parent.clone())), + // The outer pattern joins against the `or` on the entity, but not value -- ?y means + // different things in each place. + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0e.clone(), QueryValue::Column(d1e.clone()))), + ])); + assert_eq!(cc.column_bindings.get(&vx), Some(&vec![d0e, d1e])); + + // ?y does not have a binding in the `or-join` pattern. + assert_eq!(cc.column_bindings.get(&vy), Some(&vec![d0v])); + assert_eq!(cc.from, vec![SourceAlias(DatomsTable::Datoms, d0), + SourceAlias(DatomsTable::Datoms, d1)]); + } + + // These two are equivalent: + // [:find ?x :where [?x :foo/bar ?y] (or [?x :foo/baz ?y])] + // [:find ?x :where [?x :foo/bar ?y] [?x :foo/baz ?y]] + #[test] + fn test_unit_or_does_flatten() { + let schema = prepopulated_schema(); + let or_query = r#"[:find ?x + :where [?x :foo/knows ?y] + (or [?x :foo/parent ?y])]"#; + let flat_query = r#"[:find ?x + :where [?x :foo/knows ?y] + [?x :foo/parent ?y]]"#; + compare_ccs(alg(&schema, or_query), + alg(&schema, flat_query)); + } + + // Elision of `and`. + #[test] + fn test_unit_or_and_does_flatten() { + let schema = prepopulated_schema(); + let or_query = r#"[:find ?x + :where (or (and [?x :foo/parent ?y] + [?x :foo/age 7]))]"#; + let flat_query = r#"[:find ?x + :where [?x :foo/parent ?y] + [?x :foo/age 7]]"#; + compare_ccs(alg(&schema, or_query), + alg(&schema, flat_query)); + } + + // Alternation with `and`. + /// [:find ?x + /// :where (or (and [?x :foo/knows "John"] + /// [?x :foo/parent "Ámbar"]) + /// [?x :foo/knows "Daphne"])] + /// Strictly speaking this can be implemented with a `NOT EXISTS` clause for the second pattern, + /// but that would be a fair amount of analysis work, I think. + #[test] + #[should_panic(expected = "not yet implemented")] + #[allow(dead_code, unused_variables)] + fn test_alternation_with_and() { + let schema = prepopulated_schema(); + let query = r#" + [:find ?x + :where (or (and [?x :foo/knows "John"] + [?x :foo/parent "Ámbar"]) + [?x :foo/knows "Daphne"])]"#; + let cc = alg(&schema, query); + } + + #[test] + fn test_type_based_or_pruning() { + let schema = prepopulated_schema(); + // This simplifies to: + // [:find ?x + // :where [?a :some/int ?x] + // [_ :some/otherint ?x]] + let query = r#" + [:find ?x + :where [?a :foo/age ?x] + (or [_ :foo/height ?x] + [_ :foo/name ?x])]"#; + let simple = r#" + [:find ?x + :where [?a :foo/age ?x] + [_ :foo/height ?x]]"#; + compare_ccs(alg(&schema, query), alg(&schema, simple)); + } +} \ No newline at end of file diff --git a/query-algebrizer/src/clauses/pattern.rs b/query-algebrizer/src/clauses/pattern.rs index cede4ef1..a72140e1 100644 --- a/query-algebrizer/src/clauses/pattern.rs +++ b/query-algebrizer/src/clauses/pattern.rs @@ -8,8 +8,6 @@ // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. -use std::rc::Rc; - use mentat_core::{ Schema, TypedValue, @@ -70,7 +68,9 @@ impl ConjoiningClauses { /// /// - A unique-valued attribute can sometimes be rewritten into an /// existence subquery instead of a join. - fn apply_pattern_clause_for_alias<'s>(&mut self, schema: &'s Schema, pattern: &Pattern, alias: &SourceAlias) { + /// + /// This method is only public for use from `or.rs`. + pub fn apply_pattern_clause_for_alias<'s>(&mut self, schema: &'s Schema, pattern: &Pattern, alias: &SourceAlias) { if self.is_known_empty { return; } @@ -268,6 +268,7 @@ mod testing { use super::*; use std::collections::BTreeMap; + use std::rc::Rc; use mentat_core::attribute::Unique; use mentat_core::{ diff --git a/query-algebrizer/src/lib.rs b/query-algebrizer/src/lib.rs index a0e90db3..3cf044e9 100644 --- a/query-algebrizer/src/lib.rs +++ b/query-algebrizer/src/lib.rs @@ -77,6 +77,7 @@ pub fn algebrize(schema: &Schema, parsed: FindQuery) -> Result { for where_clause in where_clauses { cc.apply_clause(schema, where_clause)?; } + cc.expand_column_bindings(); let limit = if parsed.find_spec.is_unit_limited() { Some(1) } else { None }; Ok(AlgebraicQuery { diff --git a/query-algebrizer/src/types.rs b/query-algebrizer/src/types.rs index 121e5f85..8d98ddae 100644 --- a/query-algebrizer/src/types.rs +++ b/query-algebrizer/src/types.rs @@ -100,7 +100,7 @@ impl QualifiedAlias { } } -#[derive(PartialEq, Eq)] +#[derive(PartialEq, Eq, Clone)] pub enum QueryValue { Column(QualifiedAlias), Entid(Entid), @@ -233,16 +233,28 @@ impl IntoIterator for ColumnIntersection { } impl ColumnIntersection { + #[inline] pub fn len(&self) -> usize { self.0.len() } + #[inline] pub fn is_empty(&self) -> bool { self.0.is_empty() } + #[inline] + pub fn add(&mut self, constraint: ColumnConstraintOrAlternation) { + self.0.push(constraint); + } + + #[inline] pub fn add_intersection(&mut self, constraint: ColumnConstraint) { - self.0.push(ColumnConstraintOrAlternation::Constraint(constraint)); + self.add(ColumnConstraintOrAlternation::Constraint(constraint)); + } + + pub fn append(&mut self, other: &mut Self) { + self.0.append(&mut other.0) } } @@ -301,6 +313,7 @@ impl Debug for ColumnConstraint { pub enum EmptyBecause { // Var, existing, desired. TypeMismatch(Variable, HashSet, ValueType), + NoValidTypes(Variable), NonNumericArgument, NonStringFulltextValue, UnresolvedIdent(NamespacedKeyword), @@ -319,6 +332,9 @@ impl Debug for EmptyBecause { write!(f, "Type mismatch: {:?} can't be {:?}, because it's already {:?}", var, desired, existing) }, + &NoValidTypes(ref var) => { + write!(f, "Type mismatch: {:?} has no valid types", var) + }, &NonNumericArgument => { write!(f, "Non-numeric argument in numeric place") }, diff --git a/query-parser/src/parse.rs b/query-parser/src/parse.rs index f5b6a8d9..892b3f63 100644 --- a/query-parser/src/parse.rs +++ b/query-parser/src/parse.rs @@ -160,11 +160,7 @@ def_parser!(Where, or_clause, WhereClause, { .of_exactly(Where::or() .with(many1(Where::or_where_clause())) .map(|clauses| { - WhereClause::OrJoin( - OrJoin { - unify_vars: UnifyVars::Implicit, - clauses: clauses, - }) + WhereClause::OrJoin(OrJoin::new(UnifyVars::Implicit, clauses)) })) }); @@ -174,11 +170,7 @@ def_parser!(Where, or_join_clause, WhereClause, { .with(Where::rule_vars()) .and(many1(Where::or_where_clause())) .map(|(vars, clauses)| { - WhereClause::OrJoin( - OrJoin { - unify_vars: UnifyVars::Explicit(vars), - clauses: clauses, - }) + WhereClause::OrJoin(OrJoin::new(UnifyVars::Explicit(vars), clauses)) })) }); @@ -508,17 +500,15 @@ mod test { edn::Value::PlainSymbol(v.clone())])].into_iter().collect()); assert_parses_to!(Where::or_clause, input, WhereClause::OrJoin( - OrJoin { - unify_vars: UnifyVars::Implicit, - clauses: vec![OrWhereClause::Clause( - WhereClause::Pattern(Pattern { - source: None, - entity: PatternNonValuePlace::Variable(variable(e)), - attribute: PatternNonValuePlace::Variable(variable(a)), - value: PatternValuePlace::Variable(variable(v)), - tx: PatternNonValuePlace::Placeholder, - }))], - })); + OrJoin::new(UnifyVars::Implicit, + vec![OrWhereClause::Clause( + WhereClause::Pattern(Pattern { + source: None, + entity: PatternNonValuePlace::Variable(variable(e)), + attribute: PatternNonValuePlace::Variable(variable(a)), + value: PatternValuePlace::Variable(variable(v)), + tx: PatternNonValuePlace::Placeholder, + }))]))); } #[test] @@ -535,17 +525,15 @@ mod test { edn::Value::PlainSymbol(v.clone())])].into_iter().collect()); assert_parses_to!(Where::or_join_clause, input, WhereClause::OrJoin( - OrJoin { - unify_vars: UnifyVars::Explicit(vec![variable(e.clone())]), - clauses: vec![OrWhereClause::Clause( - WhereClause::Pattern(Pattern { - source: None, - entity: PatternNonValuePlace::Variable(variable(e)), - attribute: PatternNonValuePlace::Variable(variable(a)), - value: PatternValuePlace::Variable(variable(v)), - tx: PatternNonValuePlace::Placeholder, - }))], - })); + OrJoin::new(UnifyVars::Explicit(vec![variable(e.clone())]), + vec![OrWhereClause::Clause( + WhereClause::Pattern(Pattern { + source: None, + entity: PatternNonValuePlace::Variable(variable(e)), + attribute: PatternNonValuePlace::Variable(variable(a)), + value: PatternValuePlace::Variable(variable(v)), + tx: PatternNonValuePlace::Placeholder, + }))]))); } #[test] diff --git a/query-parser/tests/find_tests.rs b/query-parser/tests/find_tests.rs index 36a6064b..03db6bd0 100644 --- a/query-parser/tests/find_tests.rs +++ b/query-parser/tests/find_tests.rs @@ -70,9 +70,9 @@ fn can_parse_simple_or() { FindSpec::FindScalar(Element::Variable(Variable::from_valid_name("?x")))); assert_eq!(p.where_clauses, vec![ - WhereClause::OrJoin(OrJoin { - unify_vars: UnifyVars::Implicit, - clauses: vec![ + WhereClause::OrJoin(OrJoin::new( + UnifyVars::Implicit, + vec![ OrWhereClause::Clause( WhereClause::Pattern(Pattern { source: None, @@ -90,7 +90,7 @@ fn can_parse_simple_or() { tx: PatternNonValuePlace::Placeholder, })), ], - }), + )), ]); } @@ -103,9 +103,9 @@ fn can_parse_unit_or_join() { FindSpec::FindScalar(Element::Variable(Variable::from_valid_name("?x")))); assert_eq!(p.where_clauses, vec![ - WhereClause::OrJoin(OrJoin { - unify_vars: UnifyVars::Explicit(vec![Variable::from_valid_name("?x")]), - clauses: vec![ + WhereClause::OrJoin(OrJoin::new( + UnifyVars::Explicit(vec![Variable::from_valid_name("?x")]), + vec![ OrWhereClause::Clause( WhereClause::Pattern(Pattern { source: None, @@ -115,7 +115,7 @@ fn can_parse_unit_or_join() { tx: PatternNonValuePlace::Placeholder, })), ], - }), + )), ]); } @@ -128,9 +128,9 @@ fn can_parse_simple_or_join() { FindSpec::FindScalar(Element::Variable(Variable::from_valid_name("?x")))); assert_eq!(p.where_clauses, vec![ - WhereClause::OrJoin(OrJoin { - unify_vars: UnifyVars::Explicit(vec![Variable::from_valid_name("?x")]), - clauses: vec![ + WhereClause::OrJoin(OrJoin::new( + UnifyVars::Explicit(vec![Variable::from_valid_name("?x")]), + vec![ OrWhereClause::Clause( WhereClause::Pattern(Pattern { source: None, @@ -148,7 +148,7 @@ fn can_parse_simple_or_join() { tx: PatternNonValuePlace::Placeholder, })), ], - }), + )), ]); } @@ -166,9 +166,9 @@ fn can_parse_simple_or_and_join() { FindSpec::FindScalar(Element::Variable(Variable::from_valid_name("?x")))); assert_eq!(p.where_clauses, vec![ - WhereClause::OrJoin(OrJoin { - unify_vars: UnifyVars::Implicit, - clauses: vec![ + WhereClause::OrJoin(OrJoin::new( + UnifyVars::Implicit, + vec![ OrWhereClause::Clause( WhereClause::Pattern(Pattern { source: None, @@ -179,9 +179,9 @@ fn can_parse_simple_or_and_join() { })), OrWhereClause::And( vec![ - WhereClause::OrJoin(OrJoin { - unify_vars: UnifyVars::Implicit, - clauses: vec![ + WhereClause::OrJoin(OrJoin::new( + UnifyVars::Implicit, + vec![ OrWhereClause::Clause(WhereClause::Pattern(Pattern { source: None, entity: PatternNonValuePlace::Variable(Variable::from_valid_name("?x")), @@ -197,7 +197,7 @@ fn can_parse_simple_or_and_join() { tx: PatternNonValuePlace::Placeholder, })), ], - }), + )), WhereClause::Pred(Predicate { operator: PlainSymbol::new("<"), args: vec![ FnArg::Variable(Variable::from_valid_name("?y")), FnArg::EntidOrInteger(1), @@ -205,6 +205,6 @@ fn can_parse_simple_or_and_join() { ], ) ], - }), + )), ]); } diff --git a/query-translator/src/translate.rs b/query-translator/src/translate.rs index e6b4b375..3f50af04 100644 --- a/query-translator/src/translate.rs +++ b/query-translator/src/translate.rs @@ -8,21 +8,12 @@ // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. -#![allow(dead_code, unused_imports)] - use mentat_core::{ SQLValueType, TypedValue, ValueType, }; -use mentat_query::{ - Element, - FindSpec, - PlainSymbol, - Variable, -}; - use mentat_query_algebrizer::{ AlgebraicQuery, ColumnAlternation, @@ -31,10 +22,8 @@ use mentat_query_algebrizer::{ ColumnIntersection, ConjoiningClauses, DatomsColumn, - DatomsTable, QualifiedAlias, QueryValue, - SourceAlias, }; use mentat_query_projector::{ @@ -47,10 +36,8 @@ use mentat_query_sql::{ ColumnOrExpression, Constraint, FromClause, - Name, Op, Projection, - ProjectedColumn, SelectQuery, TableList, }; diff --git a/query-translator/tests/translate.rs b/query-translator/tests/translate.rs index 29ad6c75..ca967cfa 100644 --- a/query-translator/tests/translate.rs +++ b/query-translator/tests/translate.rs @@ -51,16 +51,20 @@ fn translate>>(schema: &Schema, input: &'static str, limit: select.query.to_sql_query().unwrap() } -fn prepopulated_schema() -> Schema { +fn prepopulated_typed_schema(foo_type: ValueType) -> Schema { let mut schema = Schema::default(); associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 99); add_attribute(&mut schema, 99, Attribute { - value_type: ValueType::String, + value_type: foo_type, ..Default::default() }); schema } +fn prepopulated_schema() -> Schema { + prepopulated_typed_schema(ValueType::String) +} + fn make_arg(name: &'static str, value: &'static str) -> (String, Rc) { (name.to_string(), Rc::new(value.to_string())) } @@ -215,13 +219,7 @@ fn test_numeric_less_than_unknown_attribute() { #[test] fn test_numeric_gte_known_attribute() { - let mut schema = Schema::default(); - associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 99); - add_attribute(&mut schema, 99, Attribute { - value_type: ValueType::Double, - ..Default::default() - }); - + let schema = prepopulated_typed_schema(ValueType::Double); let input = r#"[:find ?x :where [?x :foo/bar ?y] [(>= ?y 12.9)]]"#; let SQLQuery { sql, args } = translate(&schema, input, None); assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v >= 12.9"); @@ -230,15 +228,34 @@ fn test_numeric_gte_known_attribute() { #[test] fn test_numeric_not_equals_known_attribute() { - let mut schema = Schema::default(); - associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 99); - add_attribute(&mut schema, 99, Attribute { - value_type: ValueType::Long, - ..Default::default() - }); - + let schema = prepopulated_typed_schema(ValueType::Long); let input = r#"[:find ?x . :where [?x :foo/bar ?y] [(!= ?y 12)]]"#; let SQLQuery { sql, args } = translate(&schema, input, None); assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v <> 12 LIMIT 1"); assert_eq!(args, vec![]); -} \ No newline at end of file +} + +#[test] +fn test_simple_or_join() { + let mut schema = Schema::default(); + associate_ident(&mut schema, NamespacedKeyword::new("page", "url"), 97); + associate_ident(&mut schema, NamespacedKeyword::new("page", "title"), 98); + associate_ident(&mut schema, NamespacedKeyword::new("page", "description"), 99); + for x in 97..100 { + add_attribute(&mut schema, x, Attribute { + value_type: ValueType::String, + ..Default::default() + }); + } + + let input = r#"[:find [?url ?description] + :where + (or-join [?page] + [?page :page/url "http://foo.com/"] + [?page :page/title "Foo"]) + [?page :page/url ?url] + [?page :page/description ?description]]"#; + let SQLQuery { sql, args } = translate(&schema, input, None); + assert_eq!(sql, "SELECT `datoms01`.v AS `?url`, `datoms02`.v AS `?description` FROM `datoms` AS `datoms00`, `datoms` AS `datoms01`, `datoms` AS `datoms02` WHERE ((`datoms00`.a = 97 AND `datoms00`.v = $v0) OR (`datoms00`.a = 98 AND `datoms00`.v = $v1)) AND `datoms01`.a = 97 AND `datoms02`.a = 99 AND `datoms00`.e = `datoms01`.e AND `datoms00`.e = `datoms02`.e LIMIT 1"); + assert_eq!(args, vec![make_arg("$v0", "http://foo.com/"), make_arg("$v1", "Foo")]); +} diff --git a/query/src/lib.rs b/query/src/lib.rs index b9037117..5c724c0f 100644 --- a/query/src/lib.rs +++ b/query/src/lib.rs @@ -568,6 +568,9 @@ impl OrWhereClause { pub struct OrJoin { pub unify_vars: UnifyVars, pub clauses: Vec, + + /// Caches the result of `collect_mentioned_variables`. + mentioned_vars: Option>, } #[allow(dead_code)] @@ -595,6 +598,14 @@ pub struct FindQuery { } impl OrJoin { + pub fn new(unify_vars: UnifyVars, clauses: Vec) -> OrJoin { + OrJoin { + unify_vars: unify_vars, + clauses: clauses, + mentioned_vars: None, + } + } + /// Return true if either the `OrJoin` is `UnifyVars::Implicit`, or if /// every variable mentioned inside the join is also mentioned in the `UnifyVars` list. pub fn is_fully_unified(&self) -> bool { @@ -605,8 +616,12 @@ impl OrJoin { // it would have failed validation. That allows us to simply compare counts here. // TODO: in debug mode, do a full intersection, and verify that our count check // returns the same results. - let mentioned = self.collect_mentioned_variables(); - vars.len() == mentioned.len() + // Use the cached list if we have one. + if let Some(ref mentioned) = self.mentioned_vars { + vars.len() == mentioned.len() + } else { + vars.len() == self.collect_mentioned_variables().len() + } } } } @@ -654,6 +669,28 @@ impl ContainsVariables for OrJoin { } } +impl OrJoin { + pub fn dismember(self) -> (Vec, BTreeSet) { + let vars = match self.mentioned_vars { + Some(m) => m, + None => self.collect_mentioned_variables(), + }; + (self.clauses, vars) + } + + pub fn mentioned_variables<'a>(&'a mut self) -> &'a BTreeSet { + if self.mentioned_vars.is_none() { + let m = self.collect_mentioned_variables(); + self.mentioned_vars = Some(m); + } + if let Some(ref mentioned) = self.mentioned_vars { + mentioned + } else { + panic!() + } + } +} + impl ContainsVariables for Predicate { fn accumulate_mentioned_variables(&self, acc: &mut BTreeSet) { for arg in &self.args {