diff --git a/query-algebrizer/src/clauses/mod.rs b/query-algebrizer/src/clauses/mod.rs index 1872b8f9..c5b31b1e 100644 --- a/query-algebrizer/src/clauses/mod.rs +++ b/query-algebrizer/src/clauses/mod.rs @@ -8,11 +8,6 @@ // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. -use std::fmt::{ - Debug, - Formatter, -}; - use std::collections::{ BTreeMap, BTreeSet, @@ -21,6 +16,11 @@ use std::collections::{ use std::collections::btree_map::Entry; +use std::fmt::{ + Debug, + Formatter, +}; + use mentat_core::{ Attribute, Entid, @@ -29,6 +29,8 @@ use mentat_core::{ ValueType, }; +use mentat_core::counter::RcCounter; + use mentat_query::{ NamespacedKeyword, NonIntegerConstant, @@ -73,24 +75,39 @@ impl RcCloned for ::std::rc::Rc { } } -/// A thing that's capable of aliasing a table name for us. -/// This exists so that we can obtain predictable names in tests. -pub type TableAliaser = Box TableAlias>; - -pub fn default_table_aliaser() -> TableAliaser { - let mut i = -1; - Box::new(move |table| { - i += 1; - format!("{}{:02}", table.name(), i) - }) -} - fn unit_type_set(t: ValueType) -> HashSet { let mut s = HashSet::with_capacity(1); s.insert(t); s } +trait Contains { + fn when_contains T>(&self, k: &K, f: F) -> Option; +} + +trait Intersection { + fn with_intersected_keys(&self, ks: &BTreeSet) -> Self; +} + +impl Contains for BTreeSet { + fn when_contains T>(&self, k: &K, f: F) -> Option { + if self.contains(k) { + Some(f()) + } else { + None + } + } +} + +impl Intersection for BTreeMap { + /// Return a clone of the map with only keys that are present in `ks`. + fn with_intersected_keys(&self, ks: &BTreeSet) -> Self { + self.iter() + .filter_map(|(k, v)| ks.when_contains(k, || (k.clone(), v.clone()))) + .collect() + } +} + /// A `ConjoiningClauses` (CC) is a collection of clauses that are combined with `JOIN`. /// The topmost form in a query is a `ConjoiningClauses`. /// @@ -115,12 +132,11 @@ fn unit_type_set(t: ValueType) -> HashSet { /// * Inline expressions? ///--------------------------------------------------------------------------------------- pub struct ConjoiningClauses { - /// `true` if this set of clauses cannot yield results in the context of the current schema. - pub is_known_empty: bool, + /// `Some` if this set of clauses cannot yield results in the context of the current schema. pub empty_because: Option, - /// A function used to generate an alias for a table -- e.g., from "datoms" to "datoms123". - aliaser: TableAliaser, + /// A data source used to generate an alias for a table -- e.g., from "datoms" to "datoms123". + alias_counter: RcCounter, /// A vector of source/alias pairs used to construct a SQL `FROM` list. pub from: Vec, @@ -161,7 +177,7 @@ pub struct ConjoiningClauses { impl Debug for ConjoiningClauses { fn fmt(&self, fmt: &mut Formatter) -> ::std::fmt::Result { fmt.debug_struct("ConjoiningClauses") - .field("is_known_empty", &self.is_known_empty) + .field("empty_because", &self.empty_because) .field("from", &self.from) .field("wheres", &self.wheres) .field("column_bindings", &self.column_bindings) @@ -177,9 +193,8 @@ impl Debug for ConjoiningClauses { impl Default for ConjoiningClauses { fn default() -> ConjoiningClauses { ConjoiningClauses { - is_known_empty: false, empty_because: None, - aliaser: default_table_aliaser(), + alias_counter: RcCounter::new(), from: vec![], wheres: ColumnIntersection::default(), input_variables: BTreeSet::new(), @@ -191,6 +206,36 @@ impl Default for ConjoiningClauses { } } +/// Cloning. +impl ConjoiningClauses { + fn make_receptacle(&self) -> ConjoiningClauses { + let mut concrete = ConjoiningClauses::default(); + concrete.empty_because = self.empty_because.clone(); + + concrete.input_variables = self.input_variables.clone(); + concrete.value_bindings = self.value_bindings.clone(); + concrete.known_types = self.known_types.clone(); + concrete.extracted_types = self.extracted_types.clone(); + + concrete + } + + /// Make a new CC populated with the relevant variable associations in this CC. + /// The CC shares an alias count with all of its copies. + fn use_as_template(&self, vars: &BTreeSet) -> ConjoiningClauses { + let mut template = ConjoiningClauses::default(); + template.alias_counter = self.alias_counter.clone(); // Rc ftw. + template.empty_because = self.empty_because.clone(); + + template.input_variables = self.input_variables.intersection(vars).cloned().collect(); + template.value_bindings = self.value_bindings.with_intersected_keys(&vars); + template.known_types = self.known_types.with_intersected_keys(&vars); + template.extracted_types = self.extracted_types.with_intersected_keys(&vars); + + template + } +} + impl ConjoiningClauses { #[allow(dead_code)] fn with_value_bindings(bindings: BTreeMap) -> ConjoiningClauses { @@ -201,7 +246,8 @@ impl ConjoiningClauses { // Pre-fill our type mappings with the types of the input bindings. cc.known_types - .extend(cc.value_bindings.iter() + .extend(cc.value_bindings + .iter() .map(|(k, v)| (k.clone(), unit_type_set(v.value_type())))); cc } @@ -304,46 +350,13 @@ impl ConjoiningClauses { numeric_types.insert(ValueType::Double); numeric_types.insert(ValueType::Long); - let entry = self.known_types.entry(variable); - match entry { - Entry::Vacant(vacant) => { - vacant.insert(numeric_types); - }, - Entry::Occupied(mut occupied) => { - let narrowed: HashSet = numeric_types.intersection(occupied.get()).cloned().collect(); - match narrowed.len() { - 0 => { - // TODO: can't borrow as mutable more than once! - //self.mark_known_empty(EmptyBecause::TypeMismatch(occupied.key().clone(), occupied.get().clone(), ValueType::Double)); // I know… - }, - 1 => { - // Hooray! - self.extracted_types.remove(occupied.key()); - }, - _ => { - }, - }; - occupied.insert(narrowed); - }, - } + self.narrow_types_for_var(variable, numeric_types); } /// Constrains the var if there's no existing type. /// Marks as known-empty if it's impossible for this type to apply because there's a conflicting /// type already known. fn constrain_var_to_type(&mut self, variable: Variable, this_type: ValueType) { - // If this variable now has a known attribute, we can unhook extracted types for - // any other instances of that variable. - // For example, given - // - // ```edn - // [:find ?v :where [?x ?a ?v] [?y :foo/int ?v]] - // ``` - // - // we will initially choose to extract the type tag for `?v`, but on encountering - // the second pattern we can avoid that. - self.extracted_types.remove(&variable); - // Is there an existing mapping for this variable? // Any known inputs have already been added to known_types, and so if they conflict we'll // spot it here. @@ -355,6 +368,81 @@ impl ConjoiningClauses { } } + /// Like `constrain_var_to_type` but in reverse: this expands the set of types + /// with which a variable is associated. + /// + /// N.B.,: if we ever call `broaden_types` after `empty_because` has been set, we might + /// actually move from a state in which a variable can have no type to one that can + /// yield results! We never do so at present -- we carefully set-union types before we + /// set-intersect them -- but this is worth bearing in mind. + pub fn broaden_types(&mut self, additional_types: BTreeMap>) { + for (var, new_types) in additional_types { + match self.known_types.entry(var) { + Entry::Vacant(e) => { + if new_types.len() == 1 { + self.extracted_types.remove(e.key()); + } + e.insert(new_types); + }, + Entry::Occupied(mut e) => { + if e.get().is_empty() && self.empty_because.is_some() { + panic!("Uh oh: we failed this pattern, probably because {:?} couldn't match, but now we're broadening its type.", + e.get()); + } + e.get_mut().extend(new_types.into_iter()); + }, + } + } + } + + /// Restrict the known types for `var` to intersect with `types`. + /// If no types are already known -- `var` could have any type -- then this is equivalent to + /// simply setting the known types to `types`. + /// If the known types don't intersect with `types`, mark the pattern as known-empty. + fn narrow_types_for_var(&mut self, var: Variable, types: HashSet) { + if types.is_empty() { + // We hope this never occurs; we should catch this case earlier. + self.mark_known_empty(EmptyBecause::NoValidTypes(var)); + return; + } + + // We can't mutate `empty_because` while we're working with the `Entry`, so do this instead. + let mut empty_because: Option = None; + match self.known_types.entry(var) { + Entry::Vacant(e) => { + e.insert(types); + }, + Entry::Occupied(mut e) => { + // TODO: we shouldn't need to clone here. + let intersected: HashSet<_> = types.intersection(e.get()).cloned().collect(); + if intersected.is_empty() { + let mismatching_type = types.iter().next().unwrap().clone(); + let reason = EmptyBecause::TypeMismatch(e.key().clone(), + e.get().clone(), + mismatching_type); + empty_because = Some(reason); + } + // Always insert, even if it's empty! + e.insert(intersected); + }, + } + + if let Some(e) = empty_because { + self.mark_known_empty(e); + } + } + + /// Restrict the sets of types for the provided vars to the provided types. + /// See `narrow_types_for_var`. + pub fn narrow_types(&mut self, additional_types: BTreeMap>) { + if additional_types.is_empty() { + return; + } + for (var, new_types) in additional_types { + self.narrow_types_for_var(var, new_types); + } + } + /// Ensure that the given place has the correct types to be a tx-id. /// Right now this is mostly unimplemented: we fail hard if anything but a placeholder is /// present. @@ -376,8 +464,12 @@ impl ConjoiningClauses { } } + #[inline] + pub fn is_known_empty(&self) -> bool { + self.empty_because.is_some() + } + fn mark_known_empty(&mut self, why: EmptyBecause) { - self.is_known_empty = true; if self.empty_because.is_some() { return; } @@ -484,25 +576,43 @@ impl ConjoiningClauses { } } + pub fn next_alias_for_table(&mut self, table: DatomsTable) -> TableAlias { + format!("{}{:02}", table.name(), self.alias_counter.next()) + } + /// Produce a (table, alias) pair to handle the provided pattern. /// This is a mutating method because it mutates the aliaser function! /// Note that if this function decides that a pattern cannot match, it will flip - /// `is_known_empty`. + /// `empty_because`. fn alias_table<'s, 'a>(&mut self, schema: &'s Schema, pattern: &'a Pattern) -> Option { self.table_for_places(schema, &pattern.attribute, &pattern.value) .map_err(|reason| { self.mark_known_empty(reason); }) - .map(|table| SourceAlias(table, (self.aliaser)(table))) + .map(|table: DatomsTable| SourceAlias(table, self.next_alias_for_table(table))) .ok() } + fn get_attribute_for_value<'s>(&self, schema: &'s Schema, value: &TypedValue) -> Option<&'s Attribute> { + match value { + &TypedValue::Ref(id) => schema.attribute_for_entid(id), + &TypedValue::Keyword(ref kw) => schema.attribute_for_ident(kw), + _ => None, + } + } + fn get_attribute<'s, 'a>(&self, schema: &'s Schema, pattern: &'a Pattern) -> Option<&'s Attribute> { match pattern.attribute { PatternNonValuePlace::Entid(id) => schema.attribute_for_entid(id), PatternNonValuePlace::Ident(ref kw) => schema.attribute_for_ident(kw), + PatternNonValuePlace::Variable(ref var) => + // If the pattern has a variable, we've already determined that the binding -- if + // any -- is acceptable and yields a table. Here, simply look to see if it names + // an attribute so we can find out the type. + self.value_bindings.get(var) + .and_then(|val| self.get_attribute_for_value(schema, val)), _ => None, } @@ -546,6 +656,19 @@ impl ConjoiningClauses { } } + /// Eliminate any type extractions for variables whose types are definitely known. + pub fn prune_extracted_types(&mut self) { + if self.extracted_types.is_empty() || self.known_types.is_empty() { + return; + } + for (var, types) in self.known_types.iter() { + if types.len() == 1 { + self.extracted_types.remove(var); + } + } + } + + /// When a CC has accumulated all patterns, generate value_type_tag entries in `wheres` /// to refine value types for which two things are true: /// @@ -582,9 +705,8 @@ impl ConjoiningClauses { self.apply_predicate(schema, p) }, WhereClause::OrJoin(o) => { - validate_or_join(&o) - //?; - //self.apply_or_join(schema, o) + validate_or_join(&o)?; + self.apply_or_join(schema, o) }, _ => unimplemented!(), } @@ -606,4 +728,22 @@ fn add_attribute(schema: &mut Schema, e: Entid, a: Attribute) { #[cfg(test)] pub fn ident(ns: &str, name: &str) -> PatternNonValuePlace { PatternNonValuePlace::Ident(::std::rc::Rc::new(NamespacedKeyword::new(ns, name))) +} + +#[cfg(test)] +mod tests { + use super::*; + + // Our alias counter is shared between CCs. + #[test] + fn test_aliasing_through_template() { + let mut starter = ConjoiningClauses::default(); + let alias_zero = starter.next_alias_for_table(DatomsTable::Datoms); + let mut first = starter.use_as_template(&BTreeSet::new()); + let mut second = starter.use_as_template(&BTreeSet::new()); + let alias_one = first.next_alias_for_table(DatomsTable::Datoms); + let alias_two = second.next_alias_for_table(DatomsTable::Datoms); + assert!(alias_zero != alias_one); + assert!(alias_one != alias_two); + } } \ No newline at end of file diff --git a/query-algebrizer/src/clauses/or.rs b/query-algebrizer/src/clauses/or.rs index d8e6b1fe..2ad759d6 100644 --- a/query-algebrizer/src/clauses/or.rs +++ b/query-algebrizer/src/clauses/or.rs @@ -8,27 +8,21 @@ // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. -// WIP -#![allow(dead_code, unused_imports, unused_variables)] +use std::collections::btree_map::Entry; +use std::collections::BTreeSet; use mentat_core::{ - Entid, Schema, - TypedValue, - ValueType, }; use mentat_query::{ - NonIntegerConstant, OrJoin, OrWhereClause, Pattern, PatternValuePlace, PatternNonValuePlace, - PlainSymbol, - Predicate, - SrcVar, UnifyVars, + Variable, WhereClause, }; @@ -36,21 +30,14 @@ use clauses::ConjoiningClauses; use errors::{ Result, - Error, - ErrorKind, }; use types::{ - ColumnConstraint, + ColumnConstraintOrAlternation, + ColumnAlternation, ColumnIntersection, - DatomsColumn, DatomsTable, EmptyBecause, - NumericComparison, - QualifiedAlias, - QueryValue, - SourceAlias, - TableAlias, }; /// Return true if both left and right are the same variable or both are non-variable. @@ -84,7 +71,7 @@ pub enum DeconstructedOrJoin { KnownEmpty(EmptyBecause), Unit(OrWhereClause), UnitPattern(Pattern), - Simple(Vec), + Simple(Vec, BTreeSet), Complex(OrJoin), } @@ -106,12 +93,26 @@ impl ConjoiningClauses { } } - fn apply_or_join(&mut self, schema: &Schema, mut or_join: OrJoin) -> Result<()> { + pub fn apply_or_join(&mut self, schema: &Schema, mut or_join: OrJoin) -> Result<()> { // Simple optimization. Empty `or` clauses disappear. Unit `or` clauses // are equivalent to just the inner clause. + + // Pre-cache mentioned variables. We use these in a few places. + or_join.mentioned_variables(); + match or_join.clauses.len() { 0 => Ok(()), - 1 => self.apply_or_where_clause(schema, or_join.clauses.pop().unwrap()), + 1 if or_join.is_fully_unified() => { + let clause = or_join.clauses.pop().expect("there's a clause"); + self.apply_or_where_clause(schema, clause) + }, + // Either there's only one clause pattern, and it's not fully unified, or we + // have multiple clauses. + // In the former case we can't just apply it: it includes a variable that we don't want + // to join with the rest of the query. + // Notably, this clause might be an `and`, making this a complex pattern, so we can't + // necessarily rewrite it in place. + // In the latter case, we still need to do a bit more work. _ => self.apply_non_trivial_or_join(schema, or_join), } } @@ -175,7 +176,7 @@ impl ConjoiningClauses { /// to be called _only_ by `deconstruct_or_join`. fn _deconstruct_or_join(&self, schema: &Schema, or_join: OrJoin) -> DeconstructedOrJoin { // Preconditions enforced by `deconstruct_or_join`. - assert_eq!(or_join.unify_vars, UnifyVars::Implicit); + assert!(or_join.is_fully_unified()); assert!(or_join.clauses.len() >= 2); // We're going to collect into this. @@ -192,7 +193,8 @@ impl ConjoiningClauses { let mut empty_because: Option = None; // Walk each clause in turn, bailing as soon as we know this can't be simple. - let mut clauses = or_join.clauses.into_iter(); + let (join_clauses, mentioned_vars) = or_join.dismember(); + let mut clauses = join_clauses.into_iter(); while let Some(clause) = clauses.next() { // If we fail half-way through processing, we want to reconstitute the input. // Keep a handle to the clause itself here to smooth over the moved `if let` below. @@ -259,10 +261,10 @@ impl ConjoiningClauses { .chain(clauses) .collect(); - return DeconstructedOrJoin::Complex(OrJoin { - unify_vars: UnifyVars::Implicit, - clauses: reconstructed, - }); + return DeconstructedOrJoin::Complex(OrJoin::new( + UnifyVars::Implicit, + reconstructed, + )); } // If we got here without returning, then `patterns` is what we're working with. @@ -273,14 +275,11 @@ impl ConjoiningClauses { DeconstructedOrJoin::KnownEmpty(empty_because.unwrap()) }, 1 => DeconstructedOrJoin::UnitPattern(patterns.pop().unwrap()), - _ => DeconstructedOrJoin::Simple(patterns), + _ => DeconstructedOrJoin::Simple(patterns, mentioned_vars), } } - /// Only call this with an `or_join` with 2 or more patterns. fn apply_non_trivial_or_join(&mut self, schema: &Schema, or_join: OrJoin) -> Result<()> { - assert!(or_join.clauses.len() >= 2); - match self.deconstruct_or_join(schema, or_join) { DeconstructedOrJoin::KnownSuccess => { // The pattern came to us empty -- `(or)`. Do nothing. @@ -301,12 +300,11 @@ impl ConjoiningClauses { self.apply_pattern(schema, pattern); Ok(()) }, - DeconstructedOrJoin::Simple(patterns) => { + DeconstructedOrJoin::Simple(patterns, mentioned_vars) => { // Hooray! Fully unified and plain ol' patterns that all use the same table. // Go right ahead and produce a set of constraint alternations that we can collect, // using a single table alias. - // TODO - self.apply_simple_or_join(schema, patterns) + self.apply_simple_or_join(schema, patterns, mentioned_vars) }, DeconstructedOrJoin::Complex(_) => { // Do this the hard way. TODO @@ -343,34 +341,12 @@ impl ConjoiningClauses { /// OR (datoms00.a = 98 AND datoms00.v = 'Peter') /// ``` /// - fn apply_simple_or_join(&mut self, schema: &Schema, patterns: Vec) -> Result<()> { - assert!(patterns.len() >= 2); + fn apply_simple_or_join(&mut self, schema: &Schema, patterns: Vec, mentioned_vars: BTreeSet) -> Result<()> { + if self.is_known_empty() { + return Ok(()) + } - // Each constant attribute might _expand_ the set of possible types of the value-place - // variable. We thus generate a set of possible types, and we intersect it with the - // types already possible in the CC. If the resultant set is empty, the pattern cannot match. - // If the final set isn't unit, we must project a type tag column. - // If one of the alternations requires a type that is impossible in the CC, then we can - // discard that alternate: - // - // ```edn - // [:find ?x - // :where [?a :some/int ?x] - // (or [_ :some/otherint ?x] - // [_ :some/string ?x])] - // ``` - // - // can simplify to - // - // ```edn - // [:find ?x - // :where [?a :some/int ?x] - // [_ :some/otherint ?x]] - // ``` - // - // Similarly, if the value place is constant, it must be of a type that doesn't determine - // a different table for any of the patterns. - // TODO + assert!(patterns.len() >= 2); // Begin by building a base CC that we'll use to produce constraints from each pattern. // Populate this base CC with whatever variables are already known from the CC to which @@ -378,6 +354,490 @@ impl ConjoiningClauses { // This will give us any applicable type constraints or column mappings. // Then generate a single table alias, based on the first pattern, and use that to make any // new variable mappings we will need to extract values. + let template = self.use_as_template(&mentioned_vars); + + // We expect this to always work: if it doesn't, it means we should never have got to this + // point. + let source_alias = self.alias_table(schema, &patterns[0]).expect("couldn't get table"); + + // This is where we'll collect everything we eventually add to the destination CC. + let mut folded = ConjoiningClauses::default(); + + // Scoped borrow of source_alias. + { + // Clone this CC once for each pattern. + // Apply each pattern to its CC with the _same_ table alias. + // Each pattern's derived types are intersected with any type constraints in the + // template, sourced from the destination CC. If a variable cannot satisfy both type + // constraints, the new CC cannot match. This prunes the 'or' arms: + // + // ```edn + // [:find ?x + // :where [?a :some/int ?x] + // (or [_ :some/otherint ?x] + // [_ :some/string ?x])] + // ``` + // + // can simplify to + // + // ```edn + // [:find ?x + // :where [?a :some/int ?x] + // [_ :some/otherint ?x]] + // ``` + let mut receptacles = + patterns.into_iter() + .map(|pattern| { + let mut receptacle = template.make_receptacle(); + println!("Applying pattern with attribute {:?}", pattern.attribute); + receptacle.apply_pattern_clause_for_alias(schema, &pattern, &source_alias); + receptacle + }) + .peekable(); + + // Let's see if we can grab a reason if every pattern failed. + // If every pattern failed, we can just take the first! + let reason = receptacles.peek() + .map(|r| r.empty_because.clone()) + .unwrap_or(None); + + // Filter out empties. + let mut receptacles = receptacles.filter(|receptacle| !receptacle.is_known_empty()) + .peekable(); + + // We need to copy the column bindings from one of the receptacles. Because this is a simple + // or, we know that they're all the same. + // Because we just made an empty template, and created a new alias from the destination CC, + // we know that we can blindly merge: collisions aren't possible. + if let Some(first) = receptacles.peek() { + for (v, cols) in &first.column_bindings { + println!("Adding {:?}: {:?}", v, cols); + match self.column_bindings.entry(v.clone()) { + Entry::Vacant(e) => { + e.insert(cols.clone()); + }, + Entry::Occupied(mut e) => { + e.get_mut().append(&mut cols.clone()); + }, + } + } + } else { + // No non-empty receptacles? The destination CC is known-empty, because or([]) is false. + self.mark_known_empty(reason.unwrap_or(EmptyBecause::AttributeLookupFailed)); + return Ok(()); + } + + // Otherwise, we fold together the receptacles. + // + // Merge together the constraints from each receptacle. Each bundle of constraints is + // combined into a `ConstraintIntersection`, and the collection of intersections is + // combined into a `ConstraintAlternation`. (As an optimization, this collection can be + // simplified.) + // + // Each receptacle's known types are _unioned_. Strictly speaking this is a weakening: + // we might know that if `?x` is an integer then `?y` is a string, or vice versa, but at + // this point we'll simply state that `?x` and `?y` can both be integers or strings. + + fn vec_for_iterator(iter: &I) -> Vec where I: Iterator { + match iter.size_hint().1 { + None => Vec::new(), + Some(expected) => Vec::with_capacity(expected), + } + } + + let mut alternates: Vec = vec_for_iterator(&receptacles); + for r in receptacles { + folded.broaden_types(r.known_types); + alternates.push(r.wheres); + } + + if alternates.len() == 1 { + // Simplify. + folded.wheres = alternates.pop().unwrap(); + } else { + let alternation = ColumnAlternation(alternates); + let mut container = ColumnIntersection::default(); + container.add(ColumnConstraintOrAlternation::Alternation(alternation)); + folded.wheres = container; + } + } + + // Collect the source alias: we use a single table join to represent the entire `or`. + self.from.push(source_alias); + + // Add in the known types and constraints. + // Each constant attribute might _expand_ the set of possible types of the value-place + // variable. We thus generate a set of possible types, and we intersect it with the + // types already possible in the CC. If the resultant set is empty, the pattern cannot + // match. If the final set isn't unit, we must project a type tag column. + self.intersect(folded) + } + + fn intersect(&mut self, mut cc: ConjoiningClauses) -> Result<()> { + if cc.is_known_empty() { + self.empty_because = cc.empty_because; + } + self.wheres.append(&mut cc.wheres); + self.narrow_types(cc.known_types); Ok(()) } } + +#[cfg(test)] +mod testing { + extern crate mentat_query_parser; + + use super::*; + + use mentat_core::{ + Attribute, + TypedValue, + ValueType, + }; + + use mentat_query::{ + NamespacedKeyword, + Variable, + }; + + use self::mentat_query_parser::{ + parse_find_string, + }; + + use clauses::{ + add_attribute, + associate_ident, + }; + + use types::{ + ColumnConstraint, + DatomsColumn, + DatomsTable, + NumericComparison, + QualifiedAlias, + QueryValue, + SourceAlias, + }; + + use algebrize; + + fn alg(schema: &Schema, input: &str) -> ConjoiningClauses { + let parsed = parse_find_string(input).expect("parse failed"); + algebrize(schema.into(), parsed).expect("algebrize failed").cc + } + + fn compare_ccs(left: ConjoiningClauses, right: ConjoiningClauses) { + assert_eq!(left.wheres, right.wheres); + assert_eq!(left.from, right.from); + } + + fn prepopulated_schema() -> Schema { + let mut schema = Schema::default(); + associate_ident(&mut schema, NamespacedKeyword::new("foo", "name"), 65); + associate_ident(&mut schema, NamespacedKeyword::new("foo", "knows"), 66); + associate_ident(&mut schema, NamespacedKeyword::new("foo", "parent"), 67); + associate_ident(&mut schema, NamespacedKeyword::new("foo", "age"), 68); + associate_ident(&mut schema, NamespacedKeyword::new("foo", "height"), 69); + add_attribute(&mut schema, 65, Attribute { + value_type: ValueType::String, + multival: false, + ..Default::default() + }); + add_attribute(&mut schema, 66, Attribute { + value_type: ValueType::String, + multival: true, + ..Default::default() + }); + add_attribute(&mut schema, 67, Attribute { + value_type: ValueType::String, + multival: true, + ..Default::default() + }); + add_attribute(&mut schema, 68, Attribute { + value_type: ValueType::Long, + multival: false, + ..Default::default() + }); + add_attribute(&mut schema, 69, Attribute { + value_type: ValueType::Long, + multival: false, + ..Default::default() + }); + schema + } + /// Test that if all the attributes in an `or` fail to resolve, the entire thing fails. + #[test] + fn test_schema_based_failure() { + let schema = Schema::default(); + let query = r#" + [:find ?x + :where (or [?x :foo/nope1 "John"] + [?x :foo/nope2 "Ámbar"] + [?x :foo/nope3 "Daphne"])]"#; + let cc = alg(&schema, query); + assert!(cc.is_known_empty()); + assert_eq!(cc.empty_because, Some(EmptyBecause::InvalidAttributeIdent(NamespacedKeyword::new("foo", "nope3")))); + } + + /// Test that if only one of the attributes in an `or` resolves, it's equivalent to a simple query. + #[test] + fn test_only_one_arm_succeeds() { + let schema = prepopulated_schema(); + let query = r#" + [:find ?x + :where (or [?x :foo/nope "John"] + [?x :foo/parent "Ámbar"] + [?x :foo/nope "Daphne"])]"#; + let cc = alg(&schema, query); + assert!(!cc.is_known_empty()); + compare_ccs(cc, alg(&schema, r#"[:find ?x :where [?x :foo/parent "Ámbar"]]"#)); + } + + // Simple alternation. + #[test] + fn test_simple_alternation() { + let schema = prepopulated_schema(); + let query = r#" + [:find ?x + :where (or [?x :foo/knows "John"] + [?x :foo/parent "Ámbar"] + [?x :foo/knows "Daphne"])]"#; + let cc = alg(&schema, query); + let vx = Variable::from_valid_name("?x"); + let d0 = "datoms00".to_string(); + let d0e = QualifiedAlias(d0.clone(), DatomsColumn::Entity); + let d0a = QualifiedAlias(d0.clone(), DatomsColumn::Attribute); + let d0v = QualifiedAlias(d0.clone(), DatomsColumn::Value); + let knows = QueryValue::Entid(66); + let parent = QueryValue::Entid(67); + let john = QueryValue::TypedValue(TypedValue::typed_string("John")); + let ambar = QueryValue::TypedValue(TypedValue::typed_string("Ámbar")); + let daphne = QueryValue::TypedValue(TypedValue::typed_string("Daphne")); + + assert!(!cc.is_known_empty()); + assert_eq!(cc.wheres, ColumnIntersection(vec![ + ColumnConstraintOrAlternation::Alternation( + ColumnAlternation(vec![ + ColumnIntersection(vec![ + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0a.clone(), knows.clone())), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0v.clone(), john))]), + ColumnIntersection(vec![ + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0a.clone(), parent)), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0v.clone(), ambar))]), + ColumnIntersection(vec![ + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0a.clone(), knows)), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0v.clone(), daphne))]), + ]))])); + assert_eq!(cc.column_bindings.get(&vx), Some(&vec![d0e])); + assert_eq!(cc.from, vec![SourceAlias(DatomsTable::Datoms, d0)]); + } + + // Alternation with a pattern. + #[test] + fn test_alternation_with_pattern() { + let schema = prepopulated_schema(); + let query = r#" + [:find [?x ?name] + :where + [?x :foo/name ?name] + (or [?x :foo/knows "John"] + [?x :foo/parent "Ámbar"] + [?x :foo/knows "Daphne"])]"#; + let cc = alg(&schema, query); + let vx = Variable::from_valid_name("?x"); + let d0 = "datoms00".to_string(); + let d1 = "datoms01".to_string(); + let d0e = QualifiedAlias(d0.clone(), DatomsColumn::Entity); + let d0a = QualifiedAlias(d0.clone(), DatomsColumn::Attribute); + let d1e = QualifiedAlias(d1.clone(), DatomsColumn::Entity); + let d1a = QualifiedAlias(d1.clone(), DatomsColumn::Attribute); + let d1v = QualifiedAlias(d1.clone(), DatomsColumn::Value); + let name = QueryValue::Entid(65); + let knows = QueryValue::Entid(66); + let parent = QueryValue::Entid(67); + let john = QueryValue::TypedValue(TypedValue::typed_string("John")); + let ambar = QueryValue::TypedValue(TypedValue::typed_string("Ámbar")); + let daphne = QueryValue::TypedValue(TypedValue::typed_string("Daphne")); + + assert!(!cc.is_known_empty()); + assert_eq!(cc.wheres, ColumnIntersection(vec![ + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0a.clone(), name.clone())), + ColumnConstraintOrAlternation::Alternation( + ColumnAlternation(vec![ + ColumnIntersection(vec![ + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d1a.clone(), knows.clone())), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d1v.clone(), john))]), + ColumnIntersection(vec![ + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d1a.clone(), parent)), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d1v.clone(), ambar))]), + ColumnIntersection(vec![ + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d1a.clone(), knows)), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d1v.clone(), daphne))]), + ])), + // The outer pattern joins against the `or`. + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0e.clone(), QueryValue::Column(d1e.clone()))), + ])); + assert_eq!(cc.column_bindings.get(&vx), Some(&vec![d0e, d1e])); + assert_eq!(cc.from, vec![SourceAlias(DatomsTable::Datoms, d0), + SourceAlias(DatomsTable::Datoms, d1)]); + } + + // Alternation with a pattern and a predicate. + #[test] + fn test_alternation_with_pattern_and_predicate() { + let schema = prepopulated_schema(); + let query = r#" + [:find ?x ?age + :where + [?x :foo/age ?age] + [[< ?age 30]] + (or [?x :foo/knows "John"] + [?x :foo/knows "Daphne"])]"#; + let cc = alg(&schema, query); + let vx = Variable::from_valid_name("?x"); + let d0 = "datoms00".to_string(); + let d1 = "datoms01".to_string(); + let d0e = QualifiedAlias(d0.clone(), DatomsColumn::Entity); + let d0a = QualifiedAlias(d0.clone(), DatomsColumn::Attribute); + let d0v = QualifiedAlias(d0.clone(), DatomsColumn::Value); + let d1e = QualifiedAlias(d1.clone(), DatomsColumn::Entity); + let d1a = QualifiedAlias(d1.clone(), DatomsColumn::Attribute); + let d1v = QualifiedAlias(d1.clone(), DatomsColumn::Value); + let knows = QueryValue::Entid(66); + let age = QueryValue::Entid(68); + let john = QueryValue::TypedValue(TypedValue::typed_string("John")); + let daphne = QueryValue::TypedValue(TypedValue::typed_string("Daphne")); + + assert!(!cc.is_known_empty()); + assert_eq!(cc.wheres, ColumnIntersection(vec![ + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0a.clone(), age.clone())), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::NumericInequality { + operator: NumericComparison::LessThan, + left: QueryValue::Column(d0v.clone()), + right: QueryValue::TypedValue(TypedValue::Long(30)), + }), + ColumnConstraintOrAlternation::Alternation( + ColumnAlternation(vec![ + ColumnIntersection(vec![ + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d1a.clone(), knows.clone())), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d1v.clone(), john))]), + ColumnIntersection(vec![ + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d1a.clone(), knows)), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d1v.clone(), daphne))]), + ])), + // The outer pattern joins against the `or`. + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0e.clone(), QueryValue::Column(d1e.clone()))), + ])); + assert_eq!(cc.column_bindings.get(&vx), Some(&vec![d0e, d1e])); + assert_eq!(cc.from, vec![SourceAlias(DatomsTable::Datoms, d0), + SourceAlias(DatomsTable::Datoms, d1)]); + } + + // These two are not equivalent: + // [:find ?x :where [?x :foo/bar ?y] (or-join [?x] [?x :foo/baz ?y])] + // [:find ?x :where [?x :foo/bar ?y] [?x :foo/baz ?y]] + #[test] + #[should_panic(expected = "not yet implemented")] + fn test_unit_or_join_doesnt_flatten() { + let schema = prepopulated_schema(); + let query = r#"[:find ?x + :where [?x :foo/knows ?y] + (or-join [?x] [?x :foo/parent ?y])]"#; + let cc = alg(&schema, query); + let vx = Variable::from_valid_name("?x"); + let vy = Variable::from_valid_name("?y"); + let d0 = "datoms00".to_string(); + let d1 = "datoms01".to_string(); + let d0e = QualifiedAlias(d0.clone(), DatomsColumn::Entity); + let d0a = QualifiedAlias(d0.clone(), DatomsColumn::Attribute); + let d0v = QualifiedAlias(d0.clone(), DatomsColumn::Value); + let d1e = QualifiedAlias(d1.clone(), DatomsColumn::Entity); + let d1a = QualifiedAlias(d1.clone(), DatomsColumn::Attribute); + let knows = QueryValue::Entid(66); + let parent = QueryValue::Entid(67); + + assert!(!cc.is_known_empty()); + assert_eq!(cc.wheres, ColumnIntersection(vec![ + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0a.clone(), knows.clone())), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d1a.clone(), parent.clone())), + // The outer pattern joins against the `or` on the entity, but not value -- ?y means + // different things in each place. + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0e.clone(), QueryValue::Column(d1e.clone()))), + ])); + assert_eq!(cc.column_bindings.get(&vx), Some(&vec![d0e, d1e])); + + // ?y does not have a binding in the `or-join` pattern. + assert_eq!(cc.column_bindings.get(&vy), Some(&vec![d0v])); + assert_eq!(cc.from, vec![SourceAlias(DatomsTable::Datoms, d0), + SourceAlias(DatomsTable::Datoms, d1)]); + } + + // These two are equivalent: + // [:find ?x :where [?x :foo/bar ?y] (or [?x :foo/baz ?y])] + // [:find ?x :where [?x :foo/bar ?y] [?x :foo/baz ?y]] + #[test] + fn test_unit_or_does_flatten() { + let schema = prepopulated_schema(); + let or_query = r#"[:find ?x + :where [?x :foo/knows ?y] + (or [?x :foo/parent ?y])]"#; + let flat_query = r#"[:find ?x + :where [?x :foo/knows ?y] + [?x :foo/parent ?y]]"#; + compare_ccs(alg(&schema, or_query), + alg(&schema, flat_query)); + } + + // Elision of `and`. + #[test] + fn test_unit_or_and_does_flatten() { + let schema = prepopulated_schema(); + let or_query = r#"[:find ?x + :where (or (and [?x :foo/parent ?y] + [?x :foo/age 7]))]"#; + let flat_query = r#"[:find ?x + :where [?x :foo/parent ?y] + [?x :foo/age 7]]"#; + compare_ccs(alg(&schema, or_query), + alg(&schema, flat_query)); + } + + // Alternation with `and`. + /// [:find ?x + /// :where (or (and [?x :foo/knows "John"] + /// [?x :foo/parent "Ámbar"]) + /// [?x :foo/knows "Daphne"])] + /// Strictly speaking this can be implemented with a `NOT EXISTS` clause for the second pattern, + /// but that would be a fair amount of analysis work, I think. + #[test] + #[should_panic(expected = "not yet implemented")] + #[allow(dead_code, unused_variables)] + fn test_alternation_with_and() { + let schema = prepopulated_schema(); + let query = r#" + [:find ?x + :where (or (and [?x :foo/knows "John"] + [?x :foo/parent "Ámbar"]) + [?x :foo/knows "Daphne"])]"#; + let cc = alg(&schema, query); + } + + #[test] + fn test_type_based_or_pruning() { + let schema = prepopulated_schema(); + // This simplifies to: + // [:find ?x + // :where [?a :some/int ?x] + // [_ :some/otherint ?x]] + let query = r#" + [:find ?x + :where [?a :foo/age ?x] + (or [_ :foo/height ?x] + [_ :foo/name ?x])]"#; + let simple = r#" + [:find ?x + :where [?a :foo/age ?x] + [_ :foo/height ?x]]"#; + compare_ccs(alg(&schema, query), alg(&schema, simple)); + } +} \ No newline at end of file diff --git a/query-algebrizer/src/clauses/pattern.rs b/query-algebrizer/src/clauses/pattern.rs index cede4ef1..6aadfa52 100644 --- a/query-algebrizer/src/clauses/pattern.rs +++ b/query-algebrizer/src/clauses/pattern.rs @@ -8,8 +8,6 @@ // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. -use std::rc::Rc; - use mentat_core::{ Schema, TypedValue, @@ -43,7 +41,7 @@ impl ConjoiningClauses { /// account all information spread across two patterns. /// /// If the constraints cannot be satisfied -- for example, if this pattern includes a numeric - /// attribute and a string value -- then the `is_known_empty` field on the CC is flipped and + /// attribute and a string value -- then the `empty_because` field on the CC is flipped and /// the function returns. /// /// A pattern being impossible to satisfy isn't necessarily a bad thing -- this query might @@ -70,8 +68,10 @@ impl ConjoiningClauses { /// /// - A unique-valued attribute can sometimes be rewritten into an /// existence subquery instead of a join. - fn apply_pattern_clause_for_alias<'s>(&mut self, schema: &'s Schema, pattern: &Pattern, alias: &SourceAlias) { - if self.is_known_empty { + /// + /// This method is only public for use from `or.rs`. + pub fn apply_pattern_clause_for_alias<'s>(&mut self, schema: &'s Schema, pattern: &Pattern, alias: &SourceAlias) { + if self.is_known_empty() { return; } @@ -154,7 +154,7 @@ impl ConjoiningClauses { // Wouldn't it be nice if we didn't need to clone in the found case? // It doesn't matter too much: collisons won't be too frequent. self.constrain_var_to_type(v.clone(), this_type); - if self.is_known_empty { + if self.is_known_empty() { return; } } @@ -265,9 +265,12 @@ impl ConjoiningClauses { #[cfg(test)] mod testing { + extern crate mentat_query_parser; + use super::*; use std::collections::BTreeMap; + use std::rc::Rc; use mentat_core::attribute::Unique; use mentat_core::{ @@ -280,6 +283,10 @@ mod testing { Variable, }; + use self::mentat_query_parser::{ + parse_find_string, + }; + use clauses::{ add_attribute, associate_ident, @@ -295,6 +302,13 @@ mod testing { SourceAlias, }; + use algebrize; + + fn alg(schema: &Schema, input: &str) -> ConjoiningClauses { + let parsed = parse_find_string(input).expect("parse failed"); + algebrize(schema.into(), parsed).expect("algebrize failed").cc + } + #[test] fn test_unknown_ident() { let mut cc = ConjoiningClauses::default(); @@ -308,7 +322,7 @@ mod testing { tx: PatternNonValuePlace::Placeholder, }); - assert!(cc.is_known_empty); + assert!(cc.is_known_empty()); } #[test] @@ -326,7 +340,7 @@ mod testing { tx: PatternNonValuePlace::Placeholder, }); - assert!(cc.is_known_empty); + assert!(cc.is_known_empty()); } #[test] @@ -356,7 +370,7 @@ mod testing { let d0_v = QualifiedAlias("datoms00".to_string(), DatomsColumn::Value); // After this, we know a lot of things: - assert!(!cc.is_known_empty); + assert!(!cc.is_known_empty()); assert_eq!(cc.from, vec![SourceAlias(DatomsTable::Datoms, "datoms00".to_string())]); // ?x must be a ref. @@ -394,7 +408,7 @@ mod testing { let d0_e = QualifiedAlias("datoms00".to_string(), DatomsColumn::Entity); let d0_v = QualifiedAlias("datoms00".to_string(), DatomsColumn::Value); - assert!(!cc.is_known_empty); + assert!(!cc.is_known_empty()); assert_eq!(cc.from, vec![SourceAlias(DatomsTable::Datoms, "datoms00".to_string())]); // ?x must be a ref. @@ -443,11 +457,15 @@ mod testing { let d0_e = QualifiedAlias("datoms00".to_string(), DatomsColumn::Entity); let d0_a = QualifiedAlias("datoms00".to_string(), DatomsColumn::Attribute); - assert!(!cc.is_known_empty); + assert!(!cc.is_known_empty()); assert_eq!(cc.from, vec![SourceAlias(DatomsTable::Datoms, "datoms00".to_string())]); - // ?x must be a ref. - assert_eq!(cc.known_type(&x).unwrap(), ValueType::Ref); + // ?x must be a ref, and ?v a boolean. + assert_eq!(cc.known_type(&x), Some(ValueType::Ref)); + + // We don't need to extract a type for ?v, because the attribute is known. + assert!(!cc.extracted_types.contains_key(&v)); + assert_eq!(cc.known_type(&v), Some(ValueType::Boolean)); // ?x is bound to datoms0.e. assert_eq!(cc.column_bindings.get(&x).unwrap(), &vec![d0_e.clone()]); @@ -477,7 +495,7 @@ mod testing { tx: PatternNonValuePlace::Placeholder, }); - assert!(cc.is_known_empty); + assert!(cc.is_known_empty()); assert_eq!(cc.empty_because.unwrap(), EmptyBecause::InvalidBinding(DatomsColumn::Attribute, hello)); } @@ -503,7 +521,7 @@ mod testing { let d0_e = QualifiedAlias("all_datoms00".to_string(), DatomsColumn::Entity); - assert!(!cc.is_known_empty); + assert!(!cc.is_known_empty()); assert_eq!(cc.from, vec![SourceAlias(DatomsTable::AllDatoms, "all_datoms00".to_string())]); // ?x must be a ref. @@ -534,7 +552,7 @@ mod testing { let d0_e = QualifiedAlias("all_datoms00".to_string(), DatomsColumn::Entity); let d0_v = QualifiedAlias("all_datoms00".to_string(), DatomsColumn::Value); - assert!(!cc.is_known_empty); + assert!(!cc.is_known_empty()); assert_eq!(cc.from, vec![SourceAlias(DatomsTable::AllDatoms, "all_datoms00".to_string())]); // ?x must be a ref. @@ -597,7 +615,7 @@ mod testing { let d1_e = QualifiedAlias("datoms01".to_string(), DatomsColumn::Entity); let d1_a = QualifiedAlias("datoms01".to_string(), DatomsColumn::Attribute); - assert!(!cc.is_known_empty); + assert!(!cc.is_known_empty()); assert_eq!(cc.from, vec![ SourceAlias(DatomsTable::Datoms, "datoms00".to_string()), SourceAlias(DatomsTable::Datoms, "datoms01".to_string()), @@ -697,7 +715,7 @@ mod testing { }); // The type of the provided binding doesn't match the type of the attribute. - assert!(cc.is_known_empty); + assert!(cc.is_known_empty()); } #[test] @@ -729,7 +747,7 @@ mod testing { }); // The type of the provided binding doesn't match the type of the attribute. - assert!(cc.is_known_empty); + assert!(cc.is_known_empty()); } #[test] @@ -772,13 +790,13 @@ mod testing { // Finally, expand column bindings to get the overlaps for ?x. cc.expand_column_bindings(); - assert!(cc.is_known_empty); + assert!(cc.is_known_empty()); assert_eq!(cc.empty_because.unwrap(), EmptyBecause::TypeMismatch(y.clone(), unit_type_set(ValueType::String), ValueType::Boolean)); } #[test] - #[should_panic(expected = "assertion failed: cc.is_known_empty")] + #[should_panic(expected = "assertion failed: cc.is_known_empty()")] /// This test needs range inference in order to succeed: we must deduce that ?y must /// simultaneously be a boolean-valued attribute and a ref-valued attribute, and thus /// the CC can never return results. @@ -810,8 +828,26 @@ mod testing { // Finally, expand column bindings to get the overlaps for ?x. cc.expand_column_bindings(); - assert!(cc.is_known_empty); + assert!(cc.is_known_empty()); assert_eq!(cc.empty_because.unwrap(), EmptyBecause::TypeMismatch(x.clone(), unit_type_set(ValueType::Ref), ValueType::Boolean)); } + + #[test] + fn ensure_extracted_types_is_cleared() { + let query = r#"[:find ?e ?v :where [_ _ ?v] [?e :foo/bar ?v]]"#; + let mut schema = Schema::default(); + associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 99); + add_attribute(&mut schema, 99, Attribute { + value_type: ValueType::Boolean, + ..Default::default() + }); + let e = Variable::from_valid_name("?e"); + let v = Variable::from_valid_name("?v"); + let cc = alg(&schema, query); + assert_eq!(cc.known_types.get(&e), Some(&unit_type_set(ValueType::Ref))); + assert_eq!(cc.known_types.get(&v), Some(&unit_type_set(ValueType::Boolean))); + assert!(!cc.extracted_types.contains_key(&e)); + assert!(!cc.extracted_types.contains_key(&v)); + } } diff --git a/query-algebrizer/src/clauses/predicate.rs b/query-algebrizer/src/clauses/predicate.rs index 4242d9a0..031d20be 100644 --- a/query-algebrizer/src/clauses/predicate.rs +++ b/query-algebrizer/src/clauses/predicate.rs @@ -140,7 +140,7 @@ mod testing { value: PatternValuePlace::Variable(y.clone()), tx: PatternNonValuePlace::Placeholder, }); - assert!(!cc.is_known_empty); + assert!(!cc.is_known_empty()); let op = PlainSymbol::new("<"); let comp = NumericComparison::from_datalog_operator(op.plain_name()).unwrap(); @@ -150,11 +150,11 @@ mod testing { FnArg::Variable(Variable::from_valid_name("?y")), FnArg::EntidOrInteger(10), ]}).is_ok()); - assert!(!cc.is_known_empty); + assert!(!cc.is_known_empty()); // Finally, expand column bindings to get the overlaps for ?x. cc.expand_column_bindings(); - assert!(!cc.is_known_empty); + assert!(!cc.is_known_empty()); // After processing those two clauses, we know that ?y must be numeric, but not exactly // which type it must be. @@ -200,7 +200,7 @@ mod testing { value: PatternValuePlace::Variable(y.clone()), tx: PatternNonValuePlace::Placeholder, }); - assert!(!cc.is_known_empty); + assert!(!cc.is_known_empty()); let op = PlainSymbol::new(">="); let comp = NumericComparison::from_datalog_operator(op.plain_name()).unwrap(); @@ -210,7 +210,7 @@ mod testing { FnArg::Variable(Variable::from_valid_name("?y")), FnArg::EntidOrInteger(10), ]}).is_ok()); - assert!(!cc.is_known_empty); + assert!(!cc.is_known_empty()); cc.apply_pattern(&schema, Pattern { source: None, entity: PatternNonValuePlace::Variable(x.clone()), @@ -222,7 +222,7 @@ mod testing { // Finally, expand column bindings to get the overlaps for ?x. cc.expand_column_bindings(); - assert!(cc.is_known_empty); + assert!(cc.is_known_empty()); assert_eq!(cc.empty_because.unwrap(), EmptyBecause::TypeMismatch(y.clone(), vec![ValueType::Double, ValueType::Long].into_iter() diff --git a/query-algebrizer/src/lib.rs b/query-algebrizer/src/lib.rs index a0e90db3..8017de5e 100644 --- a/query-algebrizer/src/lib.rs +++ b/query-algebrizer/src/lib.rs @@ -63,8 +63,9 @@ impl AlgebraicQuery { }; } + #[inline] pub fn is_known_empty(&self) -> bool { - self.cc.is_known_empty + self.cc.is_known_empty() } } @@ -77,6 +78,8 @@ pub fn algebrize(schema: &Schema, parsed: FindQuery) -> Result { for where_clause in where_clauses { cc.apply_clause(schema, where_clause)?; } + cc.expand_column_bindings(); + cc.prune_extracted_types(); let limit = if parsed.find_spec.is_unit_limited() { Some(1) } else { None }; Ok(AlgebraicQuery { diff --git a/query-algebrizer/src/types.rs b/query-algebrizer/src/types.rs index 121e5f85..8d98ddae 100644 --- a/query-algebrizer/src/types.rs +++ b/query-algebrizer/src/types.rs @@ -100,7 +100,7 @@ impl QualifiedAlias { } } -#[derive(PartialEq, Eq)] +#[derive(PartialEq, Eq, Clone)] pub enum QueryValue { Column(QualifiedAlias), Entid(Entid), @@ -233,16 +233,28 @@ impl IntoIterator for ColumnIntersection { } impl ColumnIntersection { + #[inline] pub fn len(&self) -> usize { self.0.len() } + #[inline] pub fn is_empty(&self) -> bool { self.0.is_empty() } + #[inline] + pub fn add(&mut self, constraint: ColumnConstraintOrAlternation) { + self.0.push(constraint); + } + + #[inline] pub fn add_intersection(&mut self, constraint: ColumnConstraint) { - self.0.push(ColumnConstraintOrAlternation::Constraint(constraint)); + self.add(ColumnConstraintOrAlternation::Constraint(constraint)); + } + + pub fn append(&mut self, other: &mut Self) { + self.0.append(&mut other.0) } } @@ -301,6 +313,7 @@ impl Debug for ColumnConstraint { pub enum EmptyBecause { // Var, existing, desired. TypeMismatch(Variable, HashSet, ValueType), + NoValidTypes(Variable), NonNumericArgument, NonStringFulltextValue, UnresolvedIdent(NamespacedKeyword), @@ -319,6 +332,9 @@ impl Debug for EmptyBecause { write!(f, "Type mismatch: {:?} can't be {:?}, because it's already {:?}", var, desired, existing) }, + &NoValidTypes(ref var) => { + write!(f, "Type mismatch: {:?} has no valid types", var) + }, &NonNumericArgument => { write!(f, "Non-numeric argument in numeric place") }, diff --git a/query-parser/src/parse.rs b/query-parser/src/parse.rs index f5b6a8d9..892b3f63 100644 --- a/query-parser/src/parse.rs +++ b/query-parser/src/parse.rs @@ -160,11 +160,7 @@ def_parser!(Where, or_clause, WhereClause, { .of_exactly(Where::or() .with(many1(Where::or_where_clause())) .map(|clauses| { - WhereClause::OrJoin( - OrJoin { - unify_vars: UnifyVars::Implicit, - clauses: clauses, - }) + WhereClause::OrJoin(OrJoin::new(UnifyVars::Implicit, clauses)) })) }); @@ -174,11 +170,7 @@ def_parser!(Where, or_join_clause, WhereClause, { .with(Where::rule_vars()) .and(many1(Where::or_where_clause())) .map(|(vars, clauses)| { - WhereClause::OrJoin( - OrJoin { - unify_vars: UnifyVars::Explicit(vars), - clauses: clauses, - }) + WhereClause::OrJoin(OrJoin::new(UnifyVars::Explicit(vars), clauses)) })) }); @@ -508,17 +500,15 @@ mod test { edn::Value::PlainSymbol(v.clone())])].into_iter().collect()); assert_parses_to!(Where::or_clause, input, WhereClause::OrJoin( - OrJoin { - unify_vars: UnifyVars::Implicit, - clauses: vec![OrWhereClause::Clause( - WhereClause::Pattern(Pattern { - source: None, - entity: PatternNonValuePlace::Variable(variable(e)), - attribute: PatternNonValuePlace::Variable(variable(a)), - value: PatternValuePlace::Variable(variable(v)), - tx: PatternNonValuePlace::Placeholder, - }))], - })); + OrJoin::new(UnifyVars::Implicit, + vec![OrWhereClause::Clause( + WhereClause::Pattern(Pattern { + source: None, + entity: PatternNonValuePlace::Variable(variable(e)), + attribute: PatternNonValuePlace::Variable(variable(a)), + value: PatternValuePlace::Variable(variable(v)), + tx: PatternNonValuePlace::Placeholder, + }))]))); } #[test] @@ -535,17 +525,15 @@ mod test { edn::Value::PlainSymbol(v.clone())])].into_iter().collect()); assert_parses_to!(Where::or_join_clause, input, WhereClause::OrJoin( - OrJoin { - unify_vars: UnifyVars::Explicit(vec![variable(e.clone())]), - clauses: vec![OrWhereClause::Clause( - WhereClause::Pattern(Pattern { - source: None, - entity: PatternNonValuePlace::Variable(variable(e)), - attribute: PatternNonValuePlace::Variable(variable(a)), - value: PatternValuePlace::Variable(variable(v)), - tx: PatternNonValuePlace::Placeholder, - }))], - })); + OrJoin::new(UnifyVars::Explicit(vec![variable(e.clone())]), + vec![OrWhereClause::Clause( + WhereClause::Pattern(Pattern { + source: None, + entity: PatternNonValuePlace::Variable(variable(e)), + attribute: PatternNonValuePlace::Variable(variable(a)), + value: PatternValuePlace::Variable(variable(v)), + tx: PatternNonValuePlace::Placeholder, + }))]))); } #[test] diff --git a/query-parser/tests/find_tests.rs b/query-parser/tests/find_tests.rs index 36a6064b..03db6bd0 100644 --- a/query-parser/tests/find_tests.rs +++ b/query-parser/tests/find_tests.rs @@ -70,9 +70,9 @@ fn can_parse_simple_or() { FindSpec::FindScalar(Element::Variable(Variable::from_valid_name("?x")))); assert_eq!(p.where_clauses, vec![ - WhereClause::OrJoin(OrJoin { - unify_vars: UnifyVars::Implicit, - clauses: vec![ + WhereClause::OrJoin(OrJoin::new( + UnifyVars::Implicit, + vec![ OrWhereClause::Clause( WhereClause::Pattern(Pattern { source: None, @@ -90,7 +90,7 @@ fn can_parse_simple_or() { tx: PatternNonValuePlace::Placeholder, })), ], - }), + )), ]); } @@ -103,9 +103,9 @@ fn can_parse_unit_or_join() { FindSpec::FindScalar(Element::Variable(Variable::from_valid_name("?x")))); assert_eq!(p.where_clauses, vec![ - WhereClause::OrJoin(OrJoin { - unify_vars: UnifyVars::Explicit(vec![Variable::from_valid_name("?x")]), - clauses: vec![ + WhereClause::OrJoin(OrJoin::new( + UnifyVars::Explicit(vec![Variable::from_valid_name("?x")]), + vec![ OrWhereClause::Clause( WhereClause::Pattern(Pattern { source: None, @@ -115,7 +115,7 @@ fn can_parse_unit_or_join() { tx: PatternNonValuePlace::Placeholder, })), ], - }), + )), ]); } @@ -128,9 +128,9 @@ fn can_parse_simple_or_join() { FindSpec::FindScalar(Element::Variable(Variable::from_valid_name("?x")))); assert_eq!(p.where_clauses, vec![ - WhereClause::OrJoin(OrJoin { - unify_vars: UnifyVars::Explicit(vec![Variable::from_valid_name("?x")]), - clauses: vec![ + WhereClause::OrJoin(OrJoin::new( + UnifyVars::Explicit(vec![Variable::from_valid_name("?x")]), + vec![ OrWhereClause::Clause( WhereClause::Pattern(Pattern { source: None, @@ -148,7 +148,7 @@ fn can_parse_simple_or_join() { tx: PatternNonValuePlace::Placeholder, })), ], - }), + )), ]); } @@ -166,9 +166,9 @@ fn can_parse_simple_or_and_join() { FindSpec::FindScalar(Element::Variable(Variable::from_valid_name("?x")))); assert_eq!(p.where_clauses, vec![ - WhereClause::OrJoin(OrJoin { - unify_vars: UnifyVars::Implicit, - clauses: vec![ + WhereClause::OrJoin(OrJoin::new( + UnifyVars::Implicit, + vec![ OrWhereClause::Clause( WhereClause::Pattern(Pattern { source: None, @@ -179,9 +179,9 @@ fn can_parse_simple_or_and_join() { })), OrWhereClause::And( vec![ - WhereClause::OrJoin(OrJoin { - unify_vars: UnifyVars::Implicit, - clauses: vec![ + WhereClause::OrJoin(OrJoin::new( + UnifyVars::Implicit, + vec![ OrWhereClause::Clause(WhereClause::Pattern(Pattern { source: None, entity: PatternNonValuePlace::Variable(Variable::from_valid_name("?x")), @@ -197,7 +197,7 @@ fn can_parse_simple_or_and_join() { tx: PatternNonValuePlace::Placeholder, })), ], - }), + )), WhereClause::Pred(Predicate { operator: PlainSymbol::new("<"), args: vec![ FnArg::Variable(Variable::from_valid_name("?y")), FnArg::EntidOrInteger(1), @@ -205,6 +205,6 @@ fn can_parse_simple_or_and_join() { ], ) ], - }), + )), ]); } diff --git a/query-translator/src/translate.rs b/query-translator/src/translate.rs index e6b4b375..4e72334a 100644 --- a/query-translator/src/translate.rs +++ b/query-translator/src/translate.rs @@ -8,21 +8,12 @@ // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. -#![allow(dead_code, unused_imports)] - use mentat_core::{ SQLValueType, TypedValue, ValueType, }; -use mentat_query::{ - Element, - FindSpec, - PlainSymbol, - Variable, -}; - use mentat_query_algebrizer::{ AlgebraicQuery, ColumnAlternation, @@ -31,10 +22,8 @@ use mentat_query_algebrizer::{ ColumnIntersection, ConjoiningClauses, DatomsColumn, - DatomsTable, QualifiedAlias, QueryValue, - SourceAlias, }; use mentat_query_projector::{ @@ -47,10 +36,8 @@ use mentat_query_sql::{ ColumnOrExpression, Constraint, FromClause, - Name, Op, Projection, - ProjectedColumn, SelectQuery, TableList, }; @@ -171,7 +158,7 @@ fn cc_to_select_query>>(projection: Projection, cc: Conjoini FromClause::TableList(TableList(cc.from)) }; - let limit = if cc.is_known_empty { Some(0) } else { limit.into() }; + let limit = if cc.empty_because.is_some() { Some(0) } else { limit.into() }; SelectQuery { distinct: distinct, projection: projection, @@ -187,7 +174,7 @@ fn cc_to_select_query>>(projection: Projection, cc: Conjoini /// Return a query that projects `1` if the `cc` matches the store, and returns no results /// if it doesn't. pub fn cc_to_exists(cc: ConjoiningClauses) -> SelectQuery { - if cc.is_known_empty { + if cc.is_known_empty() { // In this case we can produce a very simple query that returns no results. SelectQuery { distinct: false, diff --git a/query-translator/tests/translate.rs b/query-translator/tests/translate.rs index 29ad6c75..ca967cfa 100644 --- a/query-translator/tests/translate.rs +++ b/query-translator/tests/translate.rs @@ -51,16 +51,20 @@ fn translate>>(schema: &Schema, input: &'static str, limit: select.query.to_sql_query().unwrap() } -fn prepopulated_schema() -> Schema { +fn prepopulated_typed_schema(foo_type: ValueType) -> Schema { let mut schema = Schema::default(); associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 99); add_attribute(&mut schema, 99, Attribute { - value_type: ValueType::String, + value_type: foo_type, ..Default::default() }); schema } +fn prepopulated_schema() -> Schema { + prepopulated_typed_schema(ValueType::String) +} + fn make_arg(name: &'static str, value: &'static str) -> (String, Rc) { (name.to_string(), Rc::new(value.to_string())) } @@ -215,13 +219,7 @@ fn test_numeric_less_than_unknown_attribute() { #[test] fn test_numeric_gte_known_attribute() { - let mut schema = Schema::default(); - associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 99); - add_attribute(&mut schema, 99, Attribute { - value_type: ValueType::Double, - ..Default::default() - }); - + let schema = prepopulated_typed_schema(ValueType::Double); let input = r#"[:find ?x :where [?x :foo/bar ?y] [(>= ?y 12.9)]]"#; let SQLQuery { sql, args } = translate(&schema, input, None); assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v >= 12.9"); @@ -230,15 +228,34 @@ fn test_numeric_gte_known_attribute() { #[test] fn test_numeric_not_equals_known_attribute() { - let mut schema = Schema::default(); - associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 99); - add_attribute(&mut schema, 99, Attribute { - value_type: ValueType::Long, - ..Default::default() - }); - + let schema = prepopulated_typed_schema(ValueType::Long); let input = r#"[:find ?x . :where [?x :foo/bar ?y] [(!= ?y 12)]]"#; let SQLQuery { sql, args } = translate(&schema, input, None); assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v <> 12 LIMIT 1"); assert_eq!(args, vec![]); -} \ No newline at end of file +} + +#[test] +fn test_simple_or_join() { + let mut schema = Schema::default(); + associate_ident(&mut schema, NamespacedKeyword::new("page", "url"), 97); + associate_ident(&mut schema, NamespacedKeyword::new("page", "title"), 98); + associate_ident(&mut schema, NamespacedKeyword::new("page", "description"), 99); + for x in 97..100 { + add_attribute(&mut schema, x, Attribute { + value_type: ValueType::String, + ..Default::default() + }); + } + + let input = r#"[:find [?url ?description] + :where + (or-join [?page] + [?page :page/url "http://foo.com/"] + [?page :page/title "Foo"]) + [?page :page/url ?url] + [?page :page/description ?description]]"#; + let SQLQuery { sql, args } = translate(&schema, input, None); + assert_eq!(sql, "SELECT `datoms01`.v AS `?url`, `datoms02`.v AS `?description` FROM `datoms` AS `datoms00`, `datoms` AS `datoms01`, `datoms` AS `datoms02` WHERE ((`datoms00`.a = 97 AND `datoms00`.v = $v0) OR (`datoms00`.a = 98 AND `datoms00`.v = $v1)) AND `datoms01`.a = 97 AND `datoms02`.a = 99 AND `datoms00`.e = `datoms01`.e AND `datoms00`.e = `datoms02`.e LIMIT 1"); + assert_eq!(args, vec![make_arg("$v0", "http://foo.com/"), make_arg("$v1", "Foo")]); +} diff --git a/query/src/lib.rs b/query/src/lib.rs index b9037117..5c724c0f 100644 --- a/query/src/lib.rs +++ b/query/src/lib.rs @@ -568,6 +568,9 @@ impl OrWhereClause { pub struct OrJoin { pub unify_vars: UnifyVars, pub clauses: Vec, + + /// Caches the result of `collect_mentioned_variables`. + mentioned_vars: Option>, } #[allow(dead_code)] @@ -595,6 +598,14 @@ pub struct FindQuery { } impl OrJoin { + pub fn new(unify_vars: UnifyVars, clauses: Vec) -> OrJoin { + OrJoin { + unify_vars: unify_vars, + clauses: clauses, + mentioned_vars: None, + } + } + /// Return true if either the `OrJoin` is `UnifyVars::Implicit`, or if /// every variable mentioned inside the join is also mentioned in the `UnifyVars` list. pub fn is_fully_unified(&self) -> bool { @@ -605,8 +616,12 @@ impl OrJoin { // it would have failed validation. That allows us to simply compare counts here. // TODO: in debug mode, do a full intersection, and verify that our count check // returns the same results. - let mentioned = self.collect_mentioned_variables(); - vars.len() == mentioned.len() + // Use the cached list if we have one. + if let Some(ref mentioned) = self.mentioned_vars { + vars.len() == mentioned.len() + } else { + vars.len() == self.collect_mentioned_variables().len() + } } } } @@ -654,6 +669,28 @@ impl ContainsVariables for OrJoin { } } +impl OrJoin { + pub fn dismember(self) -> (Vec, BTreeSet) { + let vars = match self.mentioned_vars { + Some(m) => m, + None => self.collect_mentioned_variables(), + }; + (self.clauses, vars) + } + + pub fn mentioned_variables<'a>(&'a mut self) -> &'a BTreeSet { + if self.mentioned_vars.is_none() { + let m = self.collect_mentioned_variables(); + self.mentioned_vars = Some(m); + } + if let Some(ref mentioned) = self.mentioned_vars { + mentioned + } else { + panic!() + } + } +} + impl ContainsVariables for Predicate { fn accumulate_mentioned_variables(&self, acc: &mut BTreeSet) { for arg in &self.args {