From 95a5326e23227df4ba38a5791be6a1e6cf302b8c Mon Sep 17 00:00:00 2001 From: Richard Newman Date: Mon, 27 Mar 2017 19:34:02 -0700 Subject: [PATCH 01/11] Pre: move EmptyBecause into types.rs. --- query-algebrizer/src/cc.rs | 14 +------------- query-algebrizer/src/types.rs | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/query-algebrizer/src/cc.rs b/query-algebrizer/src/cc.rs index f09ecf4e..73c6cfcc 100644 --- a/query-algebrizer/src/cc.rs +++ b/query-algebrizer/src/cc.rs @@ -53,6 +53,7 @@ use types::{ ColumnConstraint, DatomsColumn, DatomsTable, + EmptyBecause, NumericComparison, QualifiedAlias, QueryValue, @@ -160,19 +161,6 @@ pub struct ConjoiningClauses { extracted_types: BTreeMap, } -#[derive(PartialEq)] -pub enum EmptyBecause { - // Var, existing, desired. - TypeMismatch(Variable, HashSet, ValueType), - NonNumericArgument, - UnresolvedIdent(NamespacedKeyword), - InvalidAttributeIdent(NamespacedKeyword), - InvalidAttributeEntid(Entid), - InvalidBinding(DatomsColumn, TypedValue), - ValueTypeMismatch(ValueType, TypedValue), - AttributeLookupFailed, // Catch-all, because the table lookup code is lazy. TODO -} - impl Debug for EmptyBecause { fn fmt(&self, f: &mut Formatter) -> ::std::fmt::Result { use self::EmptyBecause::*; diff --git a/query-algebrizer/src/types.rs b/query-algebrizer/src/types.rs index a726a3c7..274c1cac 100644 --- a/query-algebrizer/src/types.rs +++ b/query-algebrizer/src/types.rs @@ -8,6 +8,8 @@ // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. +use std::collections::HashSet; + use std::fmt::{ Debug, Formatter, @@ -19,6 +21,12 @@ use mentat_core::{ TypedValue, ValueType, }; + +use mentat_query::{ + NamespacedKeyword, + Variable, +}; + /// This enum models the fixed set of default tables we have -- two /// tables and two views. #[derive(PartialEq, Eq, Clone, Copy, Debug)] @@ -204,3 +212,16 @@ impl Debug for ColumnConstraint { } } } + +#[derive(PartialEq)] +pub enum EmptyBecause { + // Var, existing, desired. + TypeMismatch(Variable, HashSet, ValueType), + NonNumericArgument, + UnresolvedIdent(NamespacedKeyword), + InvalidAttributeIdent(NamespacedKeyword), + InvalidAttributeEntid(Entid), + InvalidBinding(DatomsColumn, TypedValue), + ValueTypeMismatch(ValueType, TypedValue), + AttributeLookupFailed, // Catch-all, because the table lookup code is lazy. TODO +} \ No newline at end of file From d2e6b767c687b92806f4b913e7009f8cbfdefc37 Mon Sep 17 00:00:00 2001 From: Richard Newman Date: Thu, 30 Mar 2017 19:03:48 -0700 Subject: [PATCH 02/11] Pre: add mentat_core::utils::{ResultEffect,OptionEffect}. --- core/src/lib.rs | 1 + core/src/util.rs | 60 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) create mode 100644 core/src/util.rs diff --git a/core/src/lib.rs b/core/src/lib.rs index aac31a71..ad78de81 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -473,3 +473,4 @@ mod test { } pub mod intern_set; +pub mod util; diff --git a/core/src/util.rs b/core/src/util.rs new file mode 100644 index 00000000..cd2205d2 --- /dev/null +++ b/core/src/util.rs @@ -0,0 +1,60 @@ + +// Copyright 2016 Mozilla +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use +// this file except in compliance with the License. You may obtain a copy of the +// License at http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +/// Side-effect chaining on `Result`. +pub trait ResultEffect { + /// Invoke `f` if `self` is `Ok`, returning `self`. + fn when_ok(self, f: F) -> Self; + + /// Invoke `f` if `self` is `Err`, returning `self`. + fn when_err(self, f: F) -> Self; +} + +impl ResultEffect for Result { + fn when_ok(self, f: F) -> Self { + if self.is_ok() { + f(); + } + self + } + + fn when_err(self, f: F) -> Self { + if self.is_err() { + f(); + } + self + } +} + +/// Side-effect chaining on `Option`. +pub trait OptionEffect { + /// Invoke `f` if `self` is `None`, returning `self`. + fn when_none(self, f: F) -> Self; + + /// Invoke `f` if `self` is `Some`, returning `self`. + fn when_some(self, f: F) -> Self; +} + +impl OptionEffect for Option { + fn when_none(self, f: F) -> Self { + if self.is_none() { + f(); + } + self + } + + fn when_some(self, f: F) -> Self { + if self.is_some() { + f(); + } + self + } +} \ No newline at end of file From 439f3a228353700b60c7ce6629c535207bb87aed Mon Sep 17 00:00:00 2001 From: Richard Newman Date: Mon, 27 Mar 2017 20:53:08 -0700 Subject: [PATCH 03/11] Pre: add some 'am I a pattern?' helper predicates to clause types. --- query/src/lib.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/query/src/lib.rs b/query/src/lib.rs index dc3126d8..bc2735a7 100644 --- a/query/src/lib.rs +++ b/query/src/lib.rs @@ -504,12 +504,31 @@ pub enum UnifyVars { Explicit(Vec), } +impl WhereClause { + pub fn is_pattern(&self) -> bool { + match self { + &WhereClause::Pattern(_) => true, + _ => false, + } + } +} + #[derive(Clone, Debug, Eq, PartialEq)] pub enum OrWhereClause { Clause(WhereClause), And(Vec), } +impl OrWhereClause { + pub fn is_pattern_or_patterns(&self) -> bool { + match self { + &OrWhereClause::Clause(WhereClause::Pattern(_)) => true, + &OrWhereClause::And(ref clauses) => clauses.iter().all(|clause| clause.is_pattern()), + _ => false, + } + } +} + #[derive(Clone, Debug, Eq, PartialEq)] pub struct OrJoin { pub unify_vars: UnifyVars, From 460fdac25265ad851f07e7167b0e4f2751c19353 Mon Sep 17 00:00:00 2001 From: Richard Newman Date: Wed, 29 Mar 2017 14:11:00 -0700 Subject: [PATCH 04/11] Pre: add Variable::from_valid_name, TypedValue::{typed_string,typed_ns_keyword}. --- core/src/lib.rs | 20 +++++++++++++++++--- query/src/lib.rs | 23 ++++++++++++++++++++++- 2 files changed, 39 insertions(+), 4 deletions(-) diff --git a/core/src/lib.rs b/core/src/lib.rs index ad78de81..0cbc365f 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -93,6 +93,20 @@ impl TypedValue { &TypedValue::Keyword(_) => ValueType::Keyword, } } + + /// Construct a new `TypedValue::Keyword` instance by cloning the provided + /// values. This is expensive, so this might + /// be best limited to tests. + pub fn typed_ns_keyword(ns: &str, name: &str) -> TypedValue { + TypedValue::Keyword(NamespacedKeyword::new(ns, name)) + } + + /// Construct a new `TypedValue::String` instance by cloning the provided + /// value. This is expensive, so this might + /// be best limited to tests. + pub fn typed_string(s: &str) -> TypedValue { + TypedValue::String(s.to_string()) + } } // Put this here rather than in `db` simply because it's widely needed. @@ -141,9 +155,9 @@ impl SQLValueType for ValueType { fn test_typed_value() { assert!(TypedValue::Boolean(false).is_congruent_with(None)); assert!(TypedValue::Boolean(false).is_congruent_with(ValueType::Boolean)); - assert!(!TypedValue::String("foo".to_string()).is_congruent_with(ValueType::Boolean)); - assert!(TypedValue::String("foo".to_string()).is_congruent_with(ValueType::String)); - assert!(TypedValue::String("foo".to_string()).is_congruent_with(None)); + assert!(!TypedValue::typed_string("foo").is_congruent_with(ValueType::Boolean)); + assert!(TypedValue::typed_string("foo").is_congruent_with(ValueType::String)); + assert!(TypedValue::typed_string("foo").is_congruent_with(None)); } /// Bit flags used in `flags0` column in temporary tables created during search, diff --git a/query/src/lib.rs b/query/src/lib.rs index bc2735a7..6fbff8f0 100644 --- a/query/src/lib.rs +++ b/query/src/lib.rs @@ -44,6 +44,27 @@ pub type SrcVarName = String; // Do not include the required syntactic #[derive(Clone, PartialEq, Eq, PartialOrd, Ord)] pub struct Variable(pub PlainSymbol); +impl Variable { + pub fn as_str(&self) -> &str { + (self.0).0.as_str() + } + + pub fn to_string(&self) -> String { + (self.0).0.clone() + } + + pub fn name(&self) -> PlainSymbol { + self.0.clone() + } + + /// Return a new `Variable`, assuming that the provided string is a valid name. + pub fn from_valid_name(name: &str) -> Variable { + let s = PlainSymbol::new(name); + assert!(s.is_var_symbol()); + Variable(s) + } +} + pub trait FromValue { fn from_value(v: &edn::Value) -> Option; } @@ -633,4 +654,4 @@ impl ContainsVariables for Pattern { acc_ref(acc, v) } } -} \ No newline at end of file +} From 997df0b776daa956e65f9c4709b8dcc0519d93a5 Mon Sep 17 00:00:00 2001 From: Richard Newman Date: Mon, 27 Mar 2017 19:35:39 -0700 Subject: [PATCH 05/11] Part 1: introduce ColumnIntersection and ColumnAlternation. This provides a limited form of OR and AND for column constraints, allowing simple 'or-join' queries to be expressed on a single table alias. --- query-algebrizer/src/cc.rs | 37 +++++++------- query-algebrizer/src/lib.rs | 3 ++ query-algebrizer/src/types.rs | 84 +++++++++++++++++++++++++++++++ query-sql/src/lib.rs | 22 ++++++++ query-translator/src/translate.rs | 29 +++++++++++ 5 files changed, 157 insertions(+), 18 deletions(-) diff --git a/query-algebrizer/src/cc.rs b/query-algebrizer/src/cc.rs index 73c6cfcc..d1f34e3c 100644 --- a/query-algebrizer/src/cc.rs +++ b/query-algebrizer/src/cc.rs @@ -51,6 +51,7 @@ use errors::{ use types::{ ColumnConstraint, + ColumnIntersection, DatomsColumn, DatomsTable, EmptyBecause, @@ -129,7 +130,7 @@ pub struct ConjoiningClauses { pub from: Vec, /// A list of fragments that can be joined by `AND`. - pub wheres: Vec, + pub wheres: ColumnIntersection, /// A map from var to qualified columns. Used to project. pub column_bindings: BTreeMap>, @@ -218,7 +219,7 @@ impl Default for ConjoiningClauses { empty_because: None, aliaser: default_table_aliaser(), from: vec![], - wheres: vec![], + wheres: ColumnIntersection::default(), input_variables: BTreeSet::new(), column_bindings: BTreeMap::new(), value_bindings: BTreeMap::new(), @@ -318,11 +319,11 @@ impl ConjoiningClauses { } pub fn constrain_column_to_constant(&mut self, table: TableAlias, column: DatomsColumn, constant: TypedValue) { - self.wheres.push(ColumnConstraint::Equals(QualifiedAlias(table, column), QueryValue::TypedValue(constant))) + self.wheres.also(ColumnConstraint::Equals(QualifiedAlias(table, column), QueryValue::TypedValue(constant))) } pub fn constrain_column_to_entity(&mut self, table: TableAlias, column: DatomsColumn, entity: Entid) { - self.wheres.push(ColumnConstraint::Equals(QualifiedAlias(table, column), QueryValue::Entid(entity))) + self.wheres.also(ColumnConstraint::Equals(QualifiedAlias(table, column), QueryValue::Entid(entity))) } pub fn constrain_attribute(&mut self, table: TableAlias, attribute: Entid) { @@ -330,7 +331,7 @@ impl ConjoiningClauses { } pub fn constrain_value_to_numeric(&mut self, table: TableAlias, value: i64) { - self.wheres.push(ColumnConstraint::Equals( + self.wheres.also(ColumnConstraint::Equals( QualifiedAlias(table, DatomsColumn::Value), QueryValue::PrimitiveLong(value))) } @@ -577,7 +578,7 @@ impl ConjoiningClauses { // TODO: if both primary and secondary are .v, should we make sure // the type tag columns also match? // We don't do so in the ClojureScript version. - self.wheres.push(ColumnConstraint::Equals(primary.clone(), QueryValue::Column(secondary.clone()))); + self.wheres.also(ColumnConstraint::Equals(primary.clone(), QueryValue::Column(secondary.clone()))); } } } @@ -827,7 +828,7 @@ impl ConjoiningClauses { } else { // It must be a keyword. self.constrain_column_to_constant(col.clone(), DatomsColumn::Value, TypedValue::Keyword(kw.clone())); - self.wheres.push(ColumnConstraint::HasType(col.clone(), ValueType::Keyword)); + self.wheres.also(ColumnConstraint::HasType(col.clone(), ValueType::Keyword)); }; }, PatternValuePlace::Constant(ref c) => { @@ -863,7 +864,7 @@ impl ConjoiningClauses { // Because everything we handle here is unambiguous, we generate a single type // restriction from the value type of the typed value. if value_type.is_none() { - self.wheres.push(ColumnConstraint::HasType(col.clone(), typed_value_type)); + self.wheres.also(ColumnConstraint::HasType(col.clone(), typed_value_type)); } }, @@ -941,7 +942,7 @@ impl ConjoiningClauses { left: left, right: right, }; - self.wheres.push(constraint); + self.wheres.also(constraint); Ok(()) } } @@ -1059,7 +1060,7 @@ mod testing { assert_eq!(cc.wheres, vec![ ColumnConstraint::Equals(d0_a, QueryValue::Entid(99)), ColumnConstraint::Equals(d0_v, QueryValue::TypedValue(TypedValue::Boolean(true))), - ]); + ].into()); } #[test] @@ -1097,7 +1098,7 @@ mod testing { assert_eq!(cc.wheres, vec![ ColumnConstraint::Equals(d0_v, QueryValue::TypedValue(TypedValue::Boolean(true))), ColumnConstraint::HasType("datoms00".to_string(), ValueType::Boolean), - ]); + ].into()); } /// This test ensures that we do less work if we know the attribute thanks to a var lookup. @@ -1140,7 +1141,7 @@ mod testing { assert_eq!(cc.column_bindings.get(&x).unwrap(), &vec![d0_e.clone()]); assert_eq!(cc.wheres, vec![ ColumnConstraint::Equals(d0_a, QueryValue::Entid(99)), - ]); + ].into()); } /// Queries that bind non-entity values to entity places can't return results. @@ -1198,7 +1199,7 @@ mod testing { // ?x is bound to datoms0.e. assert_eq!(cc.column_bindings.get(&x).unwrap(), &vec![d0_e.clone()]); - assert_eq!(cc.wheres, vec![]); + assert_eq!(cc.wheres, vec![].into()); } /// This test ensures that we query all_datoms if we're looking for a string. @@ -1237,7 +1238,7 @@ mod testing { assert_eq!(cc.wheres, vec![ ColumnConstraint::Equals(d0_v, QueryValue::TypedValue(TypedValue::String("hello".to_string()))), ColumnConstraint::HasType("all_datoms00".to_string(), ValueType::String), - ]); + ].into()); } #[test] @@ -1310,7 +1311,7 @@ mod testing { ColumnConstraint::Equals(d0_v, QueryValue::TypedValue(TypedValue::String("idgoeshere".to_string()))), ColumnConstraint::Equals(d1_a, QueryValue::Entid(99)), ColumnConstraint::Equals(d0_e, QueryValue::Column(d1_e)), - ]); + ].into()); } #[test] @@ -1346,7 +1347,7 @@ mod testing { assert_eq!(cc.wheres, vec![ ColumnConstraint::Equals(d0_a, QueryValue::Entid(99)), ColumnConstraint::Equals(d0_v, QueryValue::TypedValue(TypedValue::Boolean(true))), - ]); + ].into()); // There is no binding for ?y. assert!(!cc.column_bindings.contains_key(&y)); @@ -1466,11 +1467,11 @@ mod testing { let clauses = cc.wheres; assert_eq!(clauses.len(), 1); - assert_eq!(clauses[0], ColumnConstraint::NumericInequality { + assert_eq!(clauses.0[0], ColumnConstraint::NumericInequality { operator: NumericComparison::LessThan, left: QueryValue::Column(cc.column_bindings.get(&y).unwrap()[0].clone()), right: QueryValue::TypedValue(TypedValue::Long(10)), - }); + }.into()); } #[test] diff --git a/query-algebrizer/src/lib.rs b/query-algebrizer/src/lib.rs index d6d92417..51832d26 100644 --- a/query-algebrizer/src/lib.rs +++ b/query-algebrizer/src/lib.rs @@ -93,7 +93,10 @@ pub use cc::{ }; pub use types::{ + ColumnAlternation, ColumnConstraint, + ColumnConstraintOrAlternation, + ColumnIntersection, DatomsColumn, DatomsTable, QualifiedAlias, diff --git a/query-algebrizer/src/types.rs b/query-algebrizer/src/types.rs index 274c1cac..3d1f2ce6 100644 --- a/query-algebrizer/src/types.rs +++ b/query-algebrizer/src/types.rs @@ -194,6 +194,90 @@ pub enum ColumnConstraint { HasType(TableAlias, ValueType), } +#[derive(PartialEq, Eq, Debug)] +pub enum ColumnConstraintOrAlternation { + Constraint(ColumnConstraint), + Alternation(ColumnAlternation), +} + +impl From for ColumnConstraintOrAlternation { + fn from(thing: ColumnConstraint) -> Self { + ColumnConstraintOrAlternation::Constraint(thing) + } +} + +/// A `ColumnIntersection` constraint is satisfied if all of its inner constraints are satisfied. +/// An empty intersection is always satisfied. +#[derive(PartialEq, Eq)] +pub struct ColumnIntersection(pub Vec); + +impl From> for ColumnIntersection { + fn from(thing: Vec) -> Self { + ColumnIntersection(thing.into_iter().map(|x| x.into()).collect()) + } +} + +impl Default for ColumnIntersection { + fn default() -> Self { + ColumnIntersection(vec![]) + } +} + +impl IntoIterator for ColumnIntersection { + type Item = ColumnConstraintOrAlternation; + type IntoIter = ::std::vec::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.0.into_iter() + } +} + +impl ColumnIntersection { + pub fn len(&self) -> usize { + self.0.len() + } + + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + pub fn also(&mut self, constraint: ColumnConstraint) { + self.0.push(ColumnConstraintOrAlternation::Constraint(constraint)); + } +} + +/// A `ColumnAlternation` constraint is satisfied if at least one of its inner constraints is +/// satisfied. An empty `ColumnAlternation` is never satisfied. +#[derive(PartialEq, Eq, Debug)] +pub struct ColumnAlternation(pub Vec); + +impl Default for ColumnAlternation { + fn default() -> Self { + ColumnAlternation(vec![]) + } +} + +impl IntoIterator for ColumnAlternation { + type Item = ColumnIntersection; + type IntoIter = ::std::vec::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.0.into_iter() + } +} + +impl ColumnAlternation { + pub fn instead(&mut self, intersection: ColumnIntersection) { + self.0.push(intersection); + } +} + +impl Debug for ColumnIntersection { + fn fmt(&self, f: &mut Formatter) -> ::std::fmt::Result { + write!(f, "{:?}", self.0) + } +} + impl Debug for ColumnConstraint { fn fmt(&self, f: &mut Formatter) -> ::std::fmt::Result { use self::ColumnConstraint::*; diff --git a/query-sql/src/lib.rs b/query-sql/src/lib.rs index d8b7c343..1d3d7869 100644 --- a/query-sql/src/lib.rs +++ b/query-sql/src/lib.rs @@ -81,6 +81,9 @@ pub enum Constraint { left: ColumnOrExpression, right: ColumnOrExpression, }, + Or { + constraints: Vec, + }, And { constraints: Vec, }, @@ -260,6 +263,11 @@ impl QueryFragment for Constraint { }, &And { ref constraints } => { + // An empty intersection is true. + if constraints.is_empty() { + out.push_sql("1"); + return Ok(()) + } out.push_sql("("); interpose!(constraint, constraints, { constraint.push_sql(out)? }, @@ -268,6 +276,20 @@ impl QueryFragment for Constraint { Ok(()) }, + &Or { ref constraints } => { + // An empty alternation is false. + if constraints.is_empty() { + out.push_sql("0"); + return Ok(()) + } + out.push_sql("("); + interpose!(constraint, constraints, + { constraint.push_sql(out)? }, + { out.push_sql(" OR ") }); + out.push_sql(")"); + Ok(()) + } + &In { ref left, ref list } => { left.push_sql(out)?; out.push_sql(" IN ("); diff --git a/query-translator/src/translate.rs b/query-translator/src/translate.rs index 7831a450..e6b4b375 100644 --- a/query-translator/src/translate.rs +++ b/query-translator/src/translate.rs @@ -25,7 +25,10 @@ use mentat_query::{ use mentat_query_algebrizer::{ AlgebraicQuery, + ColumnAlternation, ColumnConstraint, + ColumnConstraintOrAlternation, + ColumnIntersection, ConjoiningClauses, DatomsColumn, DatomsTable, @@ -66,6 +69,32 @@ impl ToColumn for QualifiedAlias { } } +impl ToConstraint for ColumnIntersection { + fn to_constraint(self) -> Constraint { + Constraint::And { + constraints: self.into_iter().map(|x| x.to_constraint()).collect() + } + } +} + +impl ToConstraint for ColumnAlternation { + fn to_constraint(self) -> Constraint { + Constraint::Or { + constraints: self.into_iter().map(|x| x.to_constraint()).collect() + } + } +} + +impl ToConstraint for ColumnConstraintOrAlternation { + fn to_constraint(self) -> Constraint { + use self::ColumnConstraintOrAlternation::*; + match self { + Alternation(alt) => alt.to_constraint(), + Constraint(c) => c.to_constraint(), + } + } +} + impl ToConstraint for ColumnConstraint { fn to_constraint(self) -> Constraint { use self::ColumnConstraint::*; From 01ca0ae5c198a426a889791597e188e6fb30137f Mon Sep 17 00:00:00 2001 From: Richard Newman Date: Mon, 27 Mar 2017 20:36:03 -0700 Subject: [PATCH 06/11] Part 2: add an EmptyBecause case for fulltext/non-string type mismatch. --- query-algebrizer/src/cc.rs | 3 +++ query-algebrizer/src/types.rs | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/query-algebrizer/src/cc.rs b/query-algebrizer/src/cc.rs index d1f34e3c..549976ef 100644 --- a/query-algebrizer/src/cc.rs +++ b/query-algebrizer/src/cc.rs @@ -173,6 +173,9 @@ impl Debug for EmptyBecause { &NonNumericArgument => { write!(f, "Non-numeric argument in numeric place") }, + &NonStringFulltextValue => { + write!(f, "Non-string argument for fulltext attribute") + }, &UnresolvedIdent(ref kw) => { write!(f, "Couldn't resolve keyword {}", kw) }, diff --git a/query-algebrizer/src/types.rs b/query-algebrizer/src/types.rs index 3d1f2ce6..6214fa19 100644 --- a/query-algebrizer/src/types.rs +++ b/query-algebrizer/src/types.rs @@ -297,11 +297,12 @@ impl Debug for ColumnConstraint { } } -#[derive(PartialEq)] +#[derive(PartialEq, Clone)] pub enum EmptyBecause { // Var, existing, desired. TypeMismatch(Variable, HashSet, ValueType), NonNumericArgument, + NonStringFulltextValue, UnresolvedIdent(NamespacedKeyword), InvalidAttributeIdent(NamespacedKeyword), InvalidAttributeEntid(Entid), From ce3c4f0dcabf99907f778bb8480a3ed6ab1c36ed Mon Sep 17 00:00:00 2001 From: Richard Newman Date: Wed, 29 Mar 2017 08:08:00 -0700 Subject: [PATCH 07/11] Part 3: have table_for_places return a Result, not an Option. --- query-algebrizer/src/cc.rs | 42 +++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/query-algebrizer/src/cc.rs b/query-algebrizer/src/cc.rs index 549976ef..d7153f3f 100644 --- a/query-algebrizer/src/cc.rs +++ b/query-algebrizer/src/cc.rs @@ -430,31 +430,31 @@ impl ConjoiningClauses { schema.get_entid(&ident) } - fn table_for_attribute_and_value<'s, 'a>(&self, attribute: &'s Attribute, value: &'a PatternValuePlace) -> Option { + fn table_for_attribute_and_value<'s, 'a>(&self, attribute: &'s Attribute, value: &'a PatternValuePlace) -> ::std::result::Result { if attribute.fulltext { match value { &PatternValuePlace::Placeholder => - Some(DatomsTable::Datoms), // We don't need the value. + Ok(DatomsTable::Datoms), // We don't need the value. // TODO: an existing non-string binding can cause this pattern to fail. &PatternValuePlace::Variable(_) => - Some(DatomsTable::AllDatoms), + Ok(DatomsTable::AllDatoms), &PatternValuePlace::Constant(NonIntegerConstant::Text(_)) => - Some(DatomsTable::AllDatoms), + Ok(DatomsTable::AllDatoms), _ => { // We can't succeed if there's a non-string constant value for a fulltext // field. - None - } + Err(EmptyBecause::NonStringFulltextValue) + }, } } else { - Some(DatomsTable::Datoms) + Ok(DatomsTable::Datoms) } } - fn table_for_unknown_attribute<'s, 'a>(&self, value: &'a PatternValuePlace) -> Option { + fn table_for_unknown_attribute<'s, 'a>(&self, value: &'a PatternValuePlace) -> ::std::result::Result { // If the value is known to be non-textual, we can simply use the regular datoms // table (TODO: and exclude on `index_fulltext`!). // @@ -463,7 +463,7 @@ impl ConjoiningClauses { // // If the value is a variable or string, we must use `all_datoms`, or do the join // ourselves, because we'll need to either extract or compare on the string. - Some( + Ok( match value { // TODO: see if the variable is projected, aggregated, or compared elsewhere in // the query. If it's not, we don't need to use all_datoms here. @@ -484,17 +484,17 @@ impl ConjoiningClauses { /// Decide which table to use for the provided attribute and value. /// If the attribute input or value binding doesn't name an attribute, or doesn't name an - /// attribute that is congruent with the supplied value, we mark the CC as known-empty and - /// return `None`. - fn table_for_places<'s, 'a>(&mut self, schema: &'s Schema, attribute: &'a PatternNonValuePlace, value: &'a PatternValuePlace) -> Option { + /// attribute that is congruent with the supplied value, we return an `EmptyBecause`. + /// The caller is responsible for marking the CC as known-empty if this is a fatal failure. + fn table_for_places<'s, 'a>(&self, schema: &'s Schema, attribute: &'a PatternNonValuePlace, value: &'a PatternValuePlace) -> ::std::result::Result { match attribute { &PatternNonValuePlace::Ident(ref kw) => schema.attribute_for_ident(kw) - .when_not(|| self.mark_known_empty(EmptyBecause::InvalidAttributeIdent(kw.clone()))) + .ok_or_else(|| EmptyBecause::InvalidAttributeIdent(kw.clone())) .and_then(|attribute| self.table_for_attribute_and_value(attribute, value)), &PatternNonValuePlace::Entid(id) => schema.attribute_for_entid(id) - .when_not(|| self.mark_known_empty(EmptyBecause::InvalidAttributeEntid(id))) + .ok_or_else(|| EmptyBecause::InvalidAttributeEntid(id)) .and_then(|attribute| self.table_for_attribute_and_value(attribute, value)), // TODO: In a prepared context, defer this decision until a second algebrizing phase. // #278. @@ -513,18 +513,15 @@ impl ConjoiningClauses { Some(TypedValue::Keyword(ref kw)) => // Don't recurse: avoid needing to clone the keyword. schema.attribute_for_ident(kw) - .when_not(|| self.mark_known_empty(EmptyBecause::InvalidAttributeIdent(kw.clone()))) + .ok_or_else(|| EmptyBecause::InvalidAttributeIdent(kw.clone())) .and_then(|attribute| self.table_for_attribute_and_value(attribute, value)), Some(v) => { // This pattern cannot match: the caller has bound a non-entity value to an - // attribute place. Return `None` and invalidate this CC. - self.mark_known_empty(EmptyBecause::InvalidBinding(DatomsColumn::Attribute, - v.clone())); - None + // attribute place. + Err(EmptyBecause::InvalidBinding(DatomsColumn::Attribute, v.clone())) }, } }, - } } @@ -534,8 +531,11 @@ impl ConjoiningClauses { /// `is_known_empty`. fn alias_table<'s, 'a>(&mut self, schema: &'s Schema, pattern: &'a Pattern) -> Option { self.table_for_places(schema, &pattern.attribute, &pattern.value) - .when_not(|| assert!(self.is_known_empty)) // table_for_places should have flipped this. + .map_err(|reason| { + self.mark_known_empty(reason); + }) .map(|table| SourceAlias(table, (self.aliaser)(table))) + .ok() } fn get_attribute<'s, 'a>(&self, schema: &'s Schema, pattern: &'a Pattern) -> Option<&'s Attribute> { From 72eeedec746fb8eea40404b11bce0a05a58117d4 Mon Sep 17 00:00:00 2001 From: Richard Newman Date: Tue, 28 Mar 2017 17:38:01 -0700 Subject: [PATCH 08/11] Part 4: add OrJoin::is_fully_unified. This allows us to tell if all the variables in a valid `or` join are to be unified, which is necessary for simple joins. --- query/src/lib.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/query/src/lib.rs b/query/src/lib.rs index 6fbff8f0..40ddb06b 100644 --- a/query/src/lib.rs +++ b/query/src/lib.rs @@ -580,6 +580,24 @@ pub struct FindQuery { // TODO: in_rules; } +impl OrJoin { + /// Return true if either the `OrJoin` is `UnifyVars::Implicit`, or if + /// every variable mentioned inside the join is also mentioned in the `UnifyVars` list. + pub fn is_fully_unified(&self) -> bool { + match &self.unify_vars { + &UnifyVars::Implicit => true, + &UnifyVars::Explicit(ref vars) => { + // We know that the join list must be a subset of the vars in the pattern, or + // it would have failed validation. That allows us to simply compare counts here. + // TODO: in debug mode, do a full intersection, and verify that our count check + // returns the same results. + let mentioned = self.collect_mentioned_variables(); + vars.len() == mentioned.len() + } + } + } +} + pub trait ContainsVariables { fn accumulate_mentioned_variables(&self, acc: &mut BTreeSet); fn collect_mentioned_variables(&self) -> BTreeSet { From 9e5c735460f80665bd91fc7abc21cd76556cd937 Mon Sep 17 00:00:00 2001 From: Richard Newman Date: Mon, 27 Mar 2017 20:34:56 -0700 Subject: [PATCH 09/11] Part 5: split cc.rs into a 'clauses' module. mod.rs defines the module and ConjoiningClauses itself, complete with methods to record facts and ask it questions. pattern.rs, predicate.rs, resolve.rs, and or.rs include particular functionality around accumulating certain kinds of patterns. Only `or.rs` includes significant new code; the rest is just split. --- query-algebrizer/src/cc.rs | 1621 --------------------- query-algebrizer/src/clauses/mod.rs | 593 ++++++++ query-algebrizer/src/clauses/or.rs | 80 + query-algebrizer/src/clauses/pattern.rs | 814 +++++++++++ query-algebrizer/src/clauses/predicate.rs | 233 +++ query-algebrizer/src/clauses/resolve.rs | 87 ++ query-algebrizer/src/lib.rs | 10 +- query-algebrizer/src/types.rs | 37 + query-algebrizer/src/validate.rs | 2 - 9 files changed, 1849 insertions(+), 1628 deletions(-) delete mode 100644 query-algebrizer/src/cc.rs create mode 100644 query-algebrizer/src/clauses/mod.rs create mode 100644 query-algebrizer/src/clauses/or.rs create mode 100644 query-algebrizer/src/clauses/pattern.rs create mode 100644 query-algebrizer/src/clauses/predicate.rs create mode 100644 query-algebrizer/src/clauses/resolve.rs diff --git a/query-algebrizer/src/cc.rs b/query-algebrizer/src/cc.rs deleted file mode 100644 index d7153f3f..00000000 --- a/query-algebrizer/src/cc.rs +++ /dev/null @@ -1,1621 +0,0 @@ -// Copyright 2016 Mozilla -// -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use -// this file except in compliance with the License. You may obtain a copy of the -// License at http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed -// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. - -use std::fmt::{ - Debug, - Formatter, -}; - -use std::collections::{ - BTreeMap, - BTreeSet, - HashSet, -}; - -use std::collections::btree_map::Entry; - -use mentat_core::{ - Attribute, - Entid, - Schema, - TypedValue, - ValueType, -}; - -use mentat_query::{ - FnArg, - NamespacedKeyword, - NonIntegerConstant, - Pattern, - PatternNonValuePlace, - PatternValuePlace, - PlainSymbol, - Predicate, - SrcVar, - Variable, - WhereClause, -}; - -use errors::{ - Error, - ErrorKind, - Result, -}; - -use types::{ - ColumnConstraint, - ColumnIntersection, - DatomsColumn, - DatomsTable, - EmptyBecause, - NumericComparison, - QualifiedAlias, - QueryValue, - SourceAlias, - TableAlias, -}; - -use validate::validate_or_join; - -/// A thing that's capable of aliasing a table name for us. -/// This exists so that we can obtain predictable names in tests. -pub type TableAliaser = Box TableAlias>; - -pub fn default_table_aliaser() -> TableAliaser { - let mut i = -1; - Box::new(move |table| { - i += 1; - format!("{}{:02}", table.name(), i) - }) -} - -trait OptionEffect { - fn when_not(self, f: F) -> Option; -} - -impl OptionEffect for Option { - fn when_not(self, f: F) -> Option { - if self.is_none() { - f(); - } - self - } -} - -fn unit_type_set(t: ValueType) -> HashSet { - let mut s = HashSet::with_capacity(1); - s.insert(t); - s -} - -/// A `ConjoiningClauses` (CC) is a collection of clauses that are combined with `JOIN`. -/// The topmost form in a query is a `ConjoiningClauses`. -/// -/// - Ordinary pattern clauses turn into `FROM` parts and `WHERE` parts using `=`. -/// - Predicate clauses turn into the same, but with other functions. -/// - `not` turns into `NOT EXISTS` with `WHERE` clauses inside the subquery to -/// bind it to the outer variables, or adds simple `WHERE` clauses to the outer -/// clause. -/// - `not-join` is similar, but with explicit binding. -/// - `or` turns into a collection of `UNION`s inside a subquery, or a simple -/// alternation. -/// `or`'s documentation states that all clauses must include the same vars, -/// but that's an over-simplification: all clauses must refer to the external -/// unification vars. -/// The entire `UNION`-set is `JOIN`ed to any surrounding expressions per the `rule-vars` -/// clause, or the intersection of the vars in the two sides of the `JOIN`. -/// -/// Not yet done: -/// - Function clauses with bindings turn into: -/// * Subqueries. Perhaps less efficient? Certainly clearer. -/// * Projection expressions, if only used for output. -/// * Inline expressions? -///--------------------------------------------------------------------------------------- -pub struct ConjoiningClauses { - /// `true` if this set of clauses cannot yield results in the context of the current schema. - pub is_known_empty: bool, - pub empty_because: Option, - - /// A function used to generate an alias for a table -- e.g., from "datoms" to "datoms123". - aliaser: TableAliaser, - - /// A vector of source/alias pairs used to construct a SQL `FROM` list. - pub from: Vec, - - /// A list of fragments that can be joined by `AND`. - pub wheres: ColumnIntersection, - - /// A map from var to qualified columns. Used to project. - pub column_bindings: BTreeMap>, - - /// A list of variables mentioned in the enclosing query's :in clause. These must all be bound - /// before the query can be executed. TODO: clarify what this means for nested CCs. - pub input_variables: BTreeSet, - - /// In some situations -- e.g., when a query is being run only once -- we know in advance the - /// values bound to some or all variables. These can be substituted directly when the query is - /// algebrized. - /// - /// Value bindings must agree with `known_types`. If you write a query like - /// ```edn - /// [:find ?x :in $ ?val :where [?x :foo/int ?val]] - /// ``` - /// - /// and for `?val` provide `TypedValue::String("foo".to_string())`, the query will be known at - /// algebrizing time to be empty. - value_bindings: BTreeMap, - - /// A map from var to type. Whenever a var maps unambiguously to two different types, it cannot - /// yield results, so we don't represent that case here. If a var isn't present in the map, it - /// means that its type is not known in advance. - pub known_types: BTreeMap>, - - /// A mapping, similar to `column_bindings`, but used to pull type tags out of the store at runtime. - /// If a var isn't present in `known_types`, it should be present here. - extracted_types: BTreeMap, -} - -impl Debug for EmptyBecause { - fn fmt(&self, f: &mut Formatter) -> ::std::fmt::Result { - use self::EmptyBecause::*; - match self { - &TypeMismatch(ref var, ref existing, ref desired) => { - write!(f, "Type mismatch: {:?} can't be {:?}, because it's already {:?}", - var, desired, existing) - }, - &NonNumericArgument => { - write!(f, "Non-numeric argument in numeric place") - }, - &NonStringFulltextValue => { - write!(f, "Non-string argument for fulltext attribute") - }, - &UnresolvedIdent(ref kw) => { - write!(f, "Couldn't resolve keyword {}", kw) - }, - &InvalidAttributeIdent(ref kw) => { - write!(f, "{} does not name an attribute", kw) - }, - &InvalidAttributeEntid(entid) => { - write!(f, "{} is not an attribute", entid) - }, - &InvalidBinding(ref column, ref tv) => { - write!(f, "{:?} cannot name column {:?}", tv, column) - }, - &ValueTypeMismatch(value_type, ref typed_value) => { - write!(f, "Type mismatch: {:?} doesn't match attribute type {:?}", - typed_value, value_type) - }, - &AttributeLookupFailed => { - write!(f, "Attribute lookup failed") - }, - } - } -} - -impl Debug for ConjoiningClauses { - fn fmt(&self, fmt: &mut Formatter) -> ::std::fmt::Result { - fmt.debug_struct("ConjoiningClauses") - .field("is_known_empty", &self.is_known_empty) - .field("from", &self.from) - .field("wheres", &self.wheres) - .field("column_bindings", &self.column_bindings) - .field("input_variables", &self.input_variables) - .field("value_bindings", &self.value_bindings) - .field("known_types", &self.known_types) - .field("extracted_types", &self.extracted_types) - .finish() - } -} - -/// Basics. -impl Default for ConjoiningClauses { - fn default() -> ConjoiningClauses { - ConjoiningClauses { - is_known_empty: false, - empty_because: None, - aliaser: default_table_aliaser(), - from: vec![], - wheres: ColumnIntersection::default(), - input_variables: BTreeSet::new(), - column_bindings: BTreeMap::new(), - value_bindings: BTreeMap::new(), - known_types: BTreeMap::new(), - extracted_types: BTreeMap::new(), - } - } -} - -impl ConjoiningClauses { - #[allow(dead_code)] - fn with_value_bindings(bindings: BTreeMap) -> ConjoiningClauses { - let mut cc = ConjoiningClauses { - value_bindings: bindings, - ..Default::default() - }; - - // Pre-fill our type mappings with the types of the input bindings. - cc.known_types - .extend(cc.value_bindings.iter() - .map(|(k, v)| (k.clone(), unit_type_set(v.value_type())))); - cc - } -} - -impl ConjoiningClauses { - fn bound_value(&self, var: &Variable) -> Option { - self.value_bindings.get(var).cloned() - } - - /// Return a single `ValueType` if the given variable is known to have a precise type. - /// Returns `None` if the type of the variable is unknown. - /// Returns `None` if the type of the variable is known but not precise -- "double - /// or integer" isn't good enough. - pub fn known_type(&self, var: &Variable) -> Option { - match self.known_types.get(var) { - Some(types) if types.len() == 1 => types.iter().next().cloned(), - _ => None, - } - } - - pub fn bind_column_to_var(&mut self, schema: &Schema, table: TableAlias, column: DatomsColumn, var: Variable) { - // Do we have an external binding for this? - if let Some(bound_val) = self.bound_value(&var) { - // Great! Use that instead. - // We expect callers to do things like bind keywords here; we need to translate these - // before they hit our constraints. - // TODO: recognize when the valueType might be a ref and also translate entids there. - if column == DatomsColumn::Value { - self.constrain_column_to_constant(table, column, bound_val); - } else { - match bound_val { - TypedValue::Keyword(ref kw) => { - if let Some(entid) = self.entid_for_ident(schema, kw) { - self.constrain_column_to_entity(table, column, entid); - } else { - // Impossible. - // For attributes this shouldn't occur, because we check the binding in - // `table_for_places`/`alias_table`, and if it didn't resolve to a valid - // attribute then we should have already marked the pattern as empty. - self.mark_known_empty(EmptyBecause::UnresolvedIdent(kw.clone())); - } - }, - TypedValue::Ref(entid) => { - self.constrain_column_to_entity(table, column, entid); - }, - _ => { - // One can't bind an e, a, or tx to something other than an entity. - self.mark_known_empty(EmptyBecause::InvalidBinding(column, bound_val)); - }, - } - } - - return; - } - - // Will we have an external binding for this? - // If so, we don't need to extract its type. We'll know it later. - let late_binding = self.input_variables.contains(&var); - - // If this is a value, and we don't already know its type or where - // to get its type, record that we can get it from this table. - let needs_type_extraction = - !late_binding && // Never need to extract for bound vars. - column == DatomsColumn::Value && // Never need to extract types for refs. - self.known_type(&var).is_none() && // Don't need to extract if we know a single type. - !self.extracted_types.contains_key(&var); // We're already extracting the type. - - let alias = QualifiedAlias(table, column); - - // If we subsequently find out its type, we'll remove this later -- see - // the removal in `constrain_var_to_type`. - if needs_type_extraction { - self.extracted_types.insert(var.clone(), alias.for_type_tag()); - } - self.column_bindings.entry(var).or_insert(vec![]).push(alias); - } - - pub fn constrain_column_to_constant(&mut self, table: TableAlias, column: DatomsColumn, constant: TypedValue) { - self.wheres.also(ColumnConstraint::Equals(QualifiedAlias(table, column), QueryValue::TypedValue(constant))) - } - - pub fn constrain_column_to_entity(&mut self, table: TableAlias, column: DatomsColumn, entity: Entid) { - self.wheres.also(ColumnConstraint::Equals(QualifiedAlias(table, column), QueryValue::Entid(entity))) - } - - pub fn constrain_attribute(&mut self, table: TableAlias, attribute: Entid) { - self.constrain_column_to_entity(table, DatomsColumn::Attribute, attribute) - } - - pub fn constrain_value_to_numeric(&mut self, table: TableAlias, value: i64) { - self.wheres.also(ColumnConstraint::Equals( - QualifiedAlias(table, DatomsColumn::Value), - QueryValue::PrimitiveLong(value))) - } - - /// Mark the given value as one of the set of numeric types. - fn constrain_var_to_numeric(&mut self, variable: Variable) { - let mut numeric_types = HashSet::with_capacity(2); - numeric_types.insert(ValueType::Double); - numeric_types.insert(ValueType::Long); - - let entry = self.known_types.entry(variable); - match entry { - Entry::Vacant(vacant) => { - vacant.insert(numeric_types); - }, - Entry::Occupied(mut occupied) => { - let narrowed: HashSet = numeric_types.intersection(occupied.get()).cloned().collect(); - match narrowed.len() { - 0 => { - // TODO: can't borrow as mutable more than once! - //self.mark_known_empty(EmptyBecause::TypeMismatch(occupied.key().clone(), occupied.get().clone(), ValueType::Double)); // I know… - }, - 1 => { - // Hooray! - self.extracted_types.remove(occupied.key()); - }, - _ => { - }, - }; - occupied.insert(narrowed); - }, - } - } - - /// Constrains the var if there's no existing type. - /// Marks as known-empty if it's impossible for this type to apply because there's a conflicting - /// type already known. - fn constrain_var_to_type(&mut self, variable: Variable, this_type: ValueType) { - // If this variable now has a known attribute, we can unhook extracted types for - // any other instances of that variable. - // For example, given - // - // ```edn - // [:find ?v :where [?x ?a ?v] [?y :foo/int ?v]] - // ``` - // - // we will initially choose to extract the type tag for `?v`, but on encountering - // the second pattern we can avoid that. - self.extracted_types.remove(&variable); - - // Is there an existing mapping for this variable? - // Any known inputs have already been added to known_types, and so if they conflict we'll - // spot it here. - if let Some(existing) = self.known_types.insert(variable.clone(), unit_type_set(this_type)) { - // There was an existing mapping. Does this type match? - if !existing.contains(&this_type) { - self.mark_known_empty(EmptyBecause::TypeMismatch(variable, existing, this_type)); - } - } - } - - /// Ensure that the given place has the correct types to be a tx-id. - /// Right now this is mostly unimplemented: we fail hard if anything but a placeholder is - /// present. - fn constrain_to_tx(&mut self, tx: &PatternNonValuePlace) { - match *tx { - PatternNonValuePlace::Placeholder => (), - _ => unimplemented!(), // TODO - } - } - - /// Ensure that the given place can be an entity, and is congruent with existing types. - /// This is used for `entity` and `attribute` places in a pattern. - fn constrain_to_ref(&mut self, value: &PatternNonValuePlace) { - // If it's a variable, record that it has the right type. - // Ident or attribute resolution errors (the only other check we need to do) will be done - // by the caller. - if let &PatternNonValuePlace::Variable(ref v) = value { - self.constrain_var_to_type(v.clone(), ValueType::Ref) - } - } - - fn mark_known_empty(&mut self, why: EmptyBecause) { - self.is_known_empty = true; - if self.empty_because.is_some() { - return; - } - println!("CC known empty: {:?}.", &why); // TODO: proper logging. - self.empty_because = Some(why); - } - - fn entid_for_ident<'s, 'a>(&self, schema: &'s Schema, ident: &'a NamespacedKeyword) -> Option { - schema.get_entid(&ident) - } - - fn table_for_attribute_and_value<'s, 'a>(&self, attribute: &'s Attribute, value: &'a PatternValuePlace) -> ::std::result::Result { - if attribute.fulltext { - match value { - &PatternValuePlace::Placeholder => - Ok(DatomsTable::Datoms), // We don't need the value. - - // TODO: an existing non-string binding can cause this pattern to fail. - &PatternValuePlace::Variable(_) => - Ok(DatomsTable::AllDatoms), - - &PatternValuePlace::Constant(NonIntegerConstant::Text(_)) => - Ok(DatomsTable::AllDatoms), - - _ => { - // We can't succeed if there's a non-string constant value for a fulltext - // field. - Err(EmptyBecause::NonStringFulltextValue) - }, - } - } else { - Ok(DatomsTable::Datoms) - } - } - - fn table_for_unknown_attribute<'s, 'a>(&self, value: &'a PatternValuePlace) -> ::std::result::Result { - // If the value is known to be non-textual, we can simply use the regular datoms - // table (TODO: and exclude on `index_fulltext`!). - // - // If the value is a placeholder too, then we can walk the non-value-joined view, - // because we don't care about retrieving the fulltext value. - // - // If the value is a variable or string, we must use `all_datoms`, or do the join - // ourselves, because we'll need to either extract or compare on the string. - Ok( - match value { - // TODO: see if the variable is projected, aggregated, or compared elsewhere in - // the query. If it's not, we don't need to use all_datoms here. - &PatternValuePlace::Variable(ref v) => { - // Do we know that this variable can't be a string? If so, we don't need - // AllDatoms. None or String means it could be or definitely is. - match self.known_types.get(v).map(|types| types.contains(&ValueType::String)) { - Some(false) => DatomsTable::Datoms, - _ => DatomsTable::AllDatoms, - } - } - &PatternValuePlace::Constant(NonIntegerConstant::Text(_)) => - DatomsTable::AllDatoms, - _ => - DatomsTable::Datoms, - }) - } - - /// Decide which table to use for the provided attribute and value. - /// If the attribute input or value binding doesn't name an attribute, or doesn't name an - /// attribute that is congruent with the supplied value, we return an `EmptyBecause`. - /// The caller is responsible for marking the CC as known-empty if this is a fatal failure. - fn table_for_places<'s, 'a>(&self, schema: &'s Schema, attribute: &'a PatternNonValuePlace, value: &'a PatternValuePlace) -> ::std::result::Result { - match attribute { - &PatternNonValuePlace::Ident(ref kw) => - schema.attribute_for_ident(kw) - .ok_or_else(|| EmptyBecause::InvalidAttributeIdent(kw.clone())) - .and_then(|attribute| self.table_for_attribute_and_value(attribute, value)), - &PatternNonValuePlace::Entid(id) => - schema.attribute_for_entid(id) - .ok_or_else(|| EmptyBecause::InvalidAttributeEntid(id)) - .and_then(|attribute| self.table_for_attribute_and_value(attribute, value)), - // TODO: In a prepared context, defer this decision until a second algebrizing phase. - // #278. - &PatternNonValuePlace::Placeholder => - self.table_for_unknown_attribute(value), - &PatternNonValuePlace::Variable(ref v) => { - // See if we have a binding for the variable. - match self.bound_value(v) { - // TODO: In a prepared context, defer this decision until a second algebrizing phase. - // #278. - None => - self.table_for_unknown_attribute(value), - Some(TypedValue::Ref(id)) => - // Recurse: it's easy. - self.table_for_places(schema, &PatternNonValuePlace::Entid(id), value), - Some(TypedValue::Keyword(ref kw)) => - // Don't recurse: avoid needing to clone the keyword. - schema.attribute_for_ident(kw) - .ok_or_else(|| EmptyBecause::InvalidAttributeIdent(kw.clone())) - .and_then(|attribute| self.table_for_attribute_and_value(attribute, value)), - Some(v) => { - // This pattern cannot match: the caller has bound a non-entity value to an - // attribute place. - Err(EmptyBecause::InvalidBinding(DatomsColumn::Attribute, v.clone())) - }, - } - }, - } - } - - /// Produce a (table, alias) pair to handle the provided pattern. - /// This is a mutating method because it mutates the aliaser function! - /// Note that if this function decides that a pattern cannot match, it will flip - /// `is_known_empty`. - fn alias_table<'s, 'a>(&mut self, schema: &'s Schema, pattern: &'a Pattern) -> Option { - self.table_for_places(schema, &pattern.attribute, &pattern.value) - .map_err(|reason| { - self.mark_known_empty(reason); - }) - .map(|table| SourceAlias(table, (self.aliaser)(table))) - .ok() - } - - fn get_attribute<'s, 'a>(&self, schema: &'s Schema, pattern: &'a Pattern) -> Option<&'s Attribute> { - match pattern.attribute { - PatternNonValuePlace::Entid(id) => - schema.attribute_for_entid(id), - PatternNonValuePlace::Ident(ref kw) => - schema.attribute_for_ident(kw), - _ => - None, - } - } - - fn get_value_type<'s, 'a>(&self, schema: &'s Schema, pattern: &'a Pattern) -> Option { - self.get_attribute(schema, pattern).map(|x| x.value_type) - } -} - -/// Expansions. -impl ConjoiningClauses { - - /// Take the contents of `column_bindings` and generate inter-constraints for the appropriate - /// columns into `wheres`. - /// - /// For example, a bindings map associating a var to three places in the query, like - /// - /// ```edn - /// {?foo [datoms12.e datoms13.v datoms14.e]} - /// ``` - /// - /// produces two additional constraints: - /// - /// ```example - /// datoms12.e = datoms13.v - /// datoms12.e = datoms14.e - /// ``` - pub fn expand_column_bindings(&mut self) { - for cols in self.column_bindings.values() { - if cols.len() > 1 { - let ref primary = cols[0]; - let secondaries = cols.iter().skip(1); - for secondary in secondaries { - // TODO: if both primary and secondary are .v, should we make sure - // the type tag columns also match? - // We don't do so in the ClojureScript version. - self.wheres.also(ColumnConstraint::Equals(primary.clone(), QueryValue::Column(secondary.clone()))); - } - } - } - } - - /// When a CC has accumulated all patterns, generate value_type_tag entries in `wheres` - /// to refine value types for which two things are true: - /// - /// - There are two or more different types with the same SQLite representation. E.g., - /// ValueType::Boolean shares a representation with Integer and Ref. - /// - There is no attribute constraint present in the CC. - /// - /// It's possible at this point for the space of acceptable type tags to not intersect: e.g., - /// for the query - /// - /// ```edn - /// [:find ?x :where - /// [?x ?y true] - /// [?z ?y ?x]] - /// ``` - /// - /// where `?y` must simultaneously be a ref-typed attribute and a boolean-typed attribute. This - /// function deduces that and calls `self.mark_known_empty`. #293. - pub fn expand_type_tags(&mut self) { - // TODO. - } -} - -/// Argument resolution. -impl ConjoiningClauses { - /// Take a function argument and turn it into a `QueryValue` suitable for use in a concrete - /// constraint. - /// Additionally, do two things: - /// - Mark the pattern as known-empty if any argument is known non-numeric. - /// - Mark any variables encountered as numeric. - fn resolve_numeric_argument(&mut self, function: &PlainSymbol, position: usize, arg: FnArg) -> Result { - use self::FnArg::*; - match arg { - FnArg::Variable(var) => { - self.constrain_var_to_numeric(var.clone()); - self.column_bindings - .get(&var) - .and_then(|cols| cols.first().map(|col| QueryValue::Column(col.clone()))) - .ok_or_else(|| Error::from_kind(ErrorKind::UnboundVariable(var))) - }, - // Can't be an entid. - EntidOrInteger(i) => Ok(QueryValue::TypedValue(TypedValue::Long(i))), - Ident(_) | - SrcVar(_) | - Constant(NonIntegerConstant::Boolean(_)) | - Constant(NonIntegerConstant::Text(_)) | - Constant(NonIntegerConstant::BigInteger(_)) => { - self.mark_known_empty(EmptyBecause::NonNumericArgument); - bail!(ErrorKind::NonNumericArgument(function.clone(), position)); - }, - Constant(NonIntegerConstant::Float(f)) => Ok(QueryValue::TypedValue(TypedValue::Double(f))), - } - } - - - /// Take a function argument and turn it into a `QueryValue` suitable for use in a concrete - /// constraint. - #[allow(dead_code)] - fn resolve_argument(&self, arg: FnArg) -> Result { - use self::FnArg::*; - match arg { - FnArg::Variable(var) => { - self.column_bindings - .get(&var) - .and_then(|cols| cols.first().map(|col| QueryValue::Column(col.clone()))) - .ok_or_else(|| Error::from_kind(ErrorKind::UnboundVariable(var))) - }, - EntidOrInteger(i) => Ok(QueryValue::PrimitiveLong(i)), - Ident(_) => unimplemented!(), // TODO - Constant(NonIntegerConstant::Boolean(val)) => Ok(QueryValue::TypedValue(TypedValue::Boolean(val))), - Constant(NonIntegerConstant::Float(f)) => Ok(QueryValue::TypedValue(TypedValue::Double(f))), - Constant(NonIntegerConstant::Text(s)) => Ok(QueryValue::TypedValue(TypedValue::String(s.clone()))), - Constant(NonIntegerConstant::BigInteger(_)) => unimplemented!(), - SrcVar(_) => unimplemented!(), - } - } -} - -/// Application of patterns. -impl ConjoiningClauses { - - /// Apply the constraints in the provided pattern to this CC. - /// - /// This is a single-pass process, which means it is naturally incomplete, failing to take into - /// account all information spread across two patterns. - /// - /// If the constraints cannot be satisfied -- for example, if this pattern includes a numeric - /// attribute and a string value -- then the `is_known_empty` field on the CC is flipped and - /// the function returns. - /// - /// A pattern being impossible to satisfy isn't necessarily a bad thing -- this query might - /// have branched clauses that apply to different knowledge bases, and might refer to - /// vocabulary that isn't (yet) used in this one. - /// - /// Most of the work done by this function depends on the schema and ident maps in the DB. If - /// these change, then any work done is invalid. - /// - /// There's a lot more we can do here and later by examining the - /// attribute: - /// - /// - If it's unique, and we have patterns like - /// - /// [?x :foo/unique 5] [?x :foo/unique ?y] - /// - /// then we can either prove impossibility (the two objects must - /// be equal) or deduce identity and simplify the query. - /// - /// - The same, if it's cardinality-one and the entity is known. - /// - /// - If there's a value index on this attribute, we might want to - /// run this pattern early in the query. - /// - /// - A unique-valued attribute can sometimes be rewritten into an - /// existence subquery instead of a join. - fn apply_pattern_clause_for_alias<'s>(&mut self, schema: &'s Schema, pattern: &Pattern, alias: &SourceAlias) { - if self.is_known_empty { - return; - } - - // Process each place in turn, applying constraints. - // Both `e` and `a` must be entities, which is equivalent here - // to being typed as Ref. - // Sorry for the duplication; Rust makes it a pain to abstract this. - - // The transaction part of a pattern must be an entid, variable, or placeholder. - self.constrain_to_tx(&pattern.tx); - self.constrain_to_ref(&pattern.entity); - self.constrain_to_ref(&pattern.attribute); - - let ref col = alias.1; - - match pattern.entity { - PatternNonValuePlace::Placeholder => - // Placeholders don't contribute any column bindings, nor do - // they constrain the query -- there's no need to produce - // IS NOT NULL, because we don't store nulls in our schema. - (), - PatternNonValuePlace::Variable(ref v) => - self.bind_column_to_var(schema, col.clone(), DatomsColumn::Entity, v.clone()), - PatternNonValuePlace::Entid(entid) => - self.constrain_column_to_entity(col.clone(), DatomsColumn::Entity, entid), - PatternNonValuePlace::Ident(ref ident) => { - if let Some(entid) = self.entid_for_ident(schema, ident) { - self.constrain_column_to_entity(col.clone(), DatomsColumn::Entity, entid) - } else { - // A resolution failure means we're done here. - self.mark_known_empty(EmptyBecause::UnresolvedIdent(ident.clone())); - return; - } - } - } - - match pattern.attribute { - PatternNonValuePlace::Placeholder => - (), - PatternNonValuePlace::Variable(ref v) => - self.bind_column_to_var(schema, col.clone(), DatomsColumn::Attribute, v.clone()), - PatternNonValuePlace::Entid(entid) => { - if !schema.is_attribute(entid) { - // Furthermore, that entid must resolve to an attribute. If it doesn't, this - // query is meaningless. - self.mark_known_empty(EmptyBecause::InvalidAttributeEntid(entid)); - return; - } - self.constrain_attribute(col.clone(), entid) - }, - PatternNonValuePlace::Ident(ref ident) => { - if let Some(entid) = self.entid_for_ident(schema, ident) { - self.constrain_attribute(col.clone(), entid); - - if !schema.is_attribute(entid) { - self.mark_known_empty(EmptyBecause::InvalidAttributeIdent(ident.clone())); - return; - } - } else { - // A resolution failure means we're done here. - self.mark_known_empty(EmptyBecause::UnresolvedIdent(ident.clone())); - return; - } - } - } - - // Determine if the pattern's value type is known. - // We do so by examining the value place and the attribute. - // At this point it's possible that the type of the value is - // inconsistent with the attribute; in that case this pattern - // cannot return results, and we short-circuit. - let value_type = self.get_value_type(schema, pattern); - - match pattern.value { - PatternValuePlace::Placeholder => - (), - - PatternValuePlace::Variable(ref v) => { - if let Some(this_type) = value_type { - // Wouldn't it be nice if we didn't need to clone in the found case? - // It doesn't matter too much: collisons won't be too frequent. - self.constrain_var_to_type(v.clone(), this_type); - if self.is_known_empty { - return; - } - } - - self.bind_column_to_var(schema, col.clone(), DatomsColumn::Value, v.clone()); - }, - PatternValuePlace::EntidOrInteger(i) => - // If we know the valueType, then we can determine whether this is an entid or an - // integer. If we don't, then we must generate a more general query with a - // value_type_tag. - if let Some(ValueType::Ref) = value_type { - self.constrain_column_to_entity(col.clone(), DatomsColumn::Value, i); - } else { - // If we have a pattern like: - // - // `[123 ?a 1]` - // - // then `1` could be an entid (ref), a long, a boolean, or an instant. - // - // We represent these constraints during execution: - // - // - Constraining the value column to the plain numeric value '1'. - // - Constraining its type column to one of a set of types. - // - self.constrain_value_to_numeric(col.clone(), i); - }, - PatternValuePlace::IdentOrKeyword(ref kw) => { - // If we know the valueType, then we can determine whether this is an ident or a - // keyword. If we don't, then we must generate a more general query with a - // value_type_tag. - // We can also speculatively try to resolve it as an ident; if we fail, then we - // know it can only return results if treated as a keyword, and we can treat it as - // such. - if let Some(ValueType::Ref) = value_type { - if let Some(entid) = self.entid_for_ident(schema, kw) { - self.constrain_column_to_entity(col.clone(), DatomsColumn::Value, entid) - } else { - // A resolution failure means we're done here: this attribute must have an - // entity value. - self.mark_known_empty(EmptyBecause::UnresolvedIdent(kw.clone())); - return; - } - } else { - // It must be a keyword. - self.constrain_column_to_constant(col.clone(), DatomsColumn::Value, TypedValue::Keyword(kw.clone())); - self.wheres.also(ColumnConstraint::HasType(col.clone(), ValueType::Keyword)); - }; - }, - PatternValuePlace::Constant(ref c) => { - // TODO: don't allocate. - let typed_value = c.clone().into_typed_value(); - if !typed_value.is_congruent_with(value_type) { - // If the attribute and its value don't match, the pattern must fail. - // We can never have a congruence failure if `value_type` is `None`, so we - // forcibly unwrap here. - let value_type = value_type.expect("Congruence failure but couldn't unwrap"); - let why = EmptyBecause::ValueTypeMismatch(value_type, typed_value); - self.mark_known_empty(why); - return; - } - - // TODO: if we don't know the type of the attribute because we don't know the - // attribute, we can actually work backwards to the set of appropriate attributes - // from the type of the value itself! #292. - let typed_value_type = typed_value.value_type(); - self.constrain_column_to_constant(col.clone(), DatomsColumn::Value, typed_value); - - // If we can't already determine the range of values in the DB from the attribute, - // then we must also constrain the type tag. - // - // Input values might be: - // - // - A long. This is handled by EntidOrInteger. - // - A boolean. This is unambiguous. - // - A double. This is currently unambiguous, though note that SQLite will equate 5.0 with 5. - // - A string. This is unambiguous. - // - A keyword. This is unambiguous. - // - // Because everything we handle here is unambiguous, we generate a single type - // restriction from the value type of the typed value. - if value_type.is_none() { - self.wheres.also(ColumnConstraint::HasType(col.clone(), typed_value_type)); - } - - }, - } - - } - - pub fn apply_pattern<'s, 'p>(&mut self, schema: &'s Schema, pattern: Pattern) { - // For now we only support the default source. - match pattern.source { - Some(SrcVar::DefaultSrc) | None => (), - _ => unimplemented!(), - }; - - if let Some(alias) = self.alias_table(schema, &pattern) { - self.apply_pattern_clause_for_alias(schema, &pattern, &alias); - self.from.push(alias); - } else { - // We didn't determine a table, likely because there was a mismatch - // between an attribute and a value. - // We know we cannot return a result, so we short-circuit here. - self.mark_known_empty(EmptyBecause::AttributeLookupFailed); - return; - } - } -} - -/// Application of predicates. -impl ConjoiningClauses { - /// There are several kinds of predicates/functions in our Datalog: - /// - A limited set of binary comparison operators: < > <= >= !=. - /// These are converted into SQLite binary comparisons and some type constraints. - /// - A set of predicates like `fulltext` and `get-else` that are translated into - /// SQL `MATCH`es or joins, yielding bindings. - /// - In the future, some predicates that are implemented via function calls in SQLite. - /// - /// At present we have implemented only the five built-in comparison binary operators. - pub fn apply_predicate<'s, 'p>(&mut self, schema: &'s Schema, predicate: Predicate) -> Result<()> { - // Because we'll be growing the set of built-in predicates, handling each differently, - // and ultimately allowing user-specified predicates, we match on the predicate name first. - if let Some(op) = NumericComparison::from_datalog_operator(predicate.operator.0.as_str()) { - self.apply_numeric_predicate(schema, op, predicate) - } else { - bail!(ErrorKind::UnknownFunction(predicate.operator.clone())) - } - } - - /// This function: - /// - Resolves variables and converts types to those more amenable to SQL. - /// - Ensures that the predicate functions name a known operator. - /// - Accumulates a `NumericInequality` constraint into the `wheres` list. - #[allow(unused_variables)] - pub fn apply_numeric_predicate<'s, 'p>(&mut self, schema: &'s Schema, comparison: NumericComparison, predicate: Predicate) -> Result<()> { - if predicate.args.len() != 2 { - bail!(ErrorKind::InvalidNumberOfArguments(predicate.operator.clone(), predicate.args.len(), 2)); - } - - // Go from arguments -- parser output -- to columns or values. - // Any variables that aren't bound by this point in the linear processing of clauses will - // cause the application of the predicate to fail. - let mut args = predicate.args.into_iter(); - let left = self.resolve_numeric_argument(&predicate.operator, 0, args.next().unwrap())?; - let right = self.resolve_numeric_argument(&predicate.operator, 1, args.next().unwrap())?; - - // These arguments must be variables or numeric constants. - // TODO: generalize argument resolution and validation for different kinds of predicates: - // as we process `(< ?x 5)` we are able to check or deduce that `?x` is numeric, and either - // simplify the pattern or optimize the rest of the query. - // To do so needs a slightly more sophisticated representation of type constraints — a set, - // not a single `Option`. - - // TODO: static evaluation. #383. - let constraint = ColumnConstraint::NumericInequality { - operator: comparison, - left: left, - right: right, - }; - self.wheres.also(constraint); - Ok(()) - } -} - -impl ConjoiningClauses { - // This is here, rather than in `lib.rs`, because it's recursive: `or` can contain `or`, - // and so on. - pub fn apply_clause(&mut self, schema: &Schema, where_clause: WhereClause) -> Result<()> { - match where_clause { - WhereClause::Pattern(p) => { - self.apply_pattern(schema, p); - Ok(()) - }, - WhereClause::Pred(p) => { - self.apply_predicate(schema, p) - }, - WhereClause::OrJoin(o) => { - validate_or_join(&o) - // TODO: apply. - }, - _ => unimplemented!(), - } - } -} - -#[cfg(test)] -mod testing { - use super::*; - use mentat_core::attribute::Unique; - use mentat_query::PlainSymbol; - - fn associate_ident(schema: &mut Schema, i: NamespacedKeyword, e: Entid) { - schema.entid_map.insert(e, i.clone()); - schema.ident_map.insert(i.clone(), e); - } - - fn add_attribute(schema: &mut Schema, e: Entid, a: Attribute) { - schema.schema_map.insert(e, a); - } - - #[test] - fn test_unknown_ident() { - let mut cc = ConjoiningClauses::default(); - let schema = Schema::default(); - - cc.apply_pattern(&schema, Pattern { - source: None, - entity: PatternNonValuePlace::Variable(Variable(PlainSymbol::new("?x"))), - attribute: PatternNonValuePlace::Ident(NamespacedKeyword::new("foo", "bar")), - value: PatternValuePlace::Constant(NonIntegerConstant::Boolean(true)), - tx: PatternNonValuePlace::Placeholder, - }); - - assert!(cc.is_known_empty); - } - - #[test] - fn test_unknown_attribute() { - let mut cc = ConjoiningClauses::default(); - let mut schema = Schema::default(); - - associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 99); - - cc.apply_pattern(&schema, Pattern { - source: None, - entity: PatternNonValuePlace::Variable(Variable(PlainSymbol::new("?x"))), - attribute: PatternNonValuePlace::Ident(NamespacedKeyword::new("foo", "bar")), - value: PatternValuePlace::Constant(NonIntegerConstant::Boolean(true)), - tx: PatternNonValuePlace::Placeholder, - }); - - assert!(cc.is_known_empty); - } - - #[test] - fn test_apply_simple_pattern() { - let mut cc = ConjoiningClauses::default(); - let mut schema = Schema::default(); - - associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 99); - add_attribute(&mut schema, 99, Attribute { - value_type: ValueType::Boolean, - ..Default::default() - }); - - let x = Variable(PlainSymbol::new("?x")); - cc.apply_pattern(&schema, Pattern { - source: None, - entity: PatternNonValuePlace::Variable(x.clone()), - attribute: PatternNonValuePlace::Ident(NamespacedKeyword::new("foo", "bar")), - value: PatternValuePlace::Constant(NonIntegerConstant::Boolean(true)), - tx: PatternNonValuePlace::Placeholder, - }); - - // println!("{:#?}", cc); - - let d0_e = QualifiedAlias("datoms00".to_string(), DatomsColumn::Entity); - let d0_a = QualifiedAlias("datoms00".to_string(), DatomsColumn::Attribute); - let d0_v = QualifiedAlias("datoms00".to_string(), DatomsColumn::Value); - - // After this, we know a lot of things: - assert!(!cc.is_known_empty); - assert_eq!(cc.from, vec![SourceAlias(DatomsTable::Datoms, "datoms00".to_string())]); - - // ?x must be a ref. - assert_eq!(cc.known_type(&x).unwrap(), ValueType::Ref); - - // ?x is bound to datoms0.e. - assert_eq!(cc.column_bindings.get(&x).unwrap(), &vec![d0_e.clone()]); - - // Our 'where' clauses are two: - // - datoms0.a = 99 - // - datoms0.v = true - // No need for a type tag constraint, because the attribute is known. - assert_eq!(cc.wheres, vec![ - ColumnConstraint::Equals(d0_a, QueryValue::Entid(99)), - ColumnConstraint::Equals(d0_v, QueryValue::TypedValue(TypedValue::Boolean(true))), - ].into()); - } - - #[test] - fn test_apply_unattributed_pattern() { - let mut cc = ConjoiningClauses::default(); - let schema = Schema::default(); - - let x = Variable(PlainSymbol::new("?x")); - cc.apply_pattern(&schema, Pattern { - source: None, - entity: PatternNonValuePlace::Variable(x.clone()), - attribute: PatternNonValuePlace::Placeholder, - value: PatternValuePlace::Constant(NonIntegerConstant::Boolean(true)), - tx: PatternNonValuePlace::Placeholder, - }); - - // println!("{:#?}", cc); - - let d0_e = QualifiedAlias("datoms00".to_string(), DatomsColumn::Entity); - let d0_v = QualifiedAlias("datoms00".to_string(), DatomsColumn::Value); - - assert!(!cc.is_known_empty); - assert_eq!(cc.from, vec![SourceAlias(DatomsTable::Datoms, "datoms00".to_string())]); - - // ?x must be a ref. - assert_eq!(cc.known_type(&x).unwrap(), ValueType::Ref); - - // ?x is bound to datoms0.e. - assert_eq!(cc.column_bindings.get(&x).unwrap(), &vec![d0_e.clone()]); - - // Our 'where' clauses are two: - // - datoms0.v = true - // - datoms0.value_type_tag = boolean - // TODO: implement expand_type_tags. - assert_eq!(cc.wheres, vec![ - ColumnConstraint::Equals(d0_v, QueryValue::TypedValue(TypedValue::Boolean(true))), - ColumnConstraint::HasType("datoms00".to_string(), ValueType::Boolean), - ].into()); - } - - /// This test ensures that we do less work if we know the attribute thanks to a var lookup. - #[test] - fn test_apply_unattributed_but_bound_pattern_with_returned() { - let mut cc = ConjoiningClauses::default(); - let mut schema = Schema::default(); - associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 99); - add_attribute(&mut schema, 99, Attribute { - value_type: ValueType::Boolean, - ..Default::default() - }); - - let x = Variable(PlainSymbol::new("?x")); - let a = Variable(PlainSymbol::new("?a")); - let v = Variable(PlainSymbol::new("?v")); - - cc.input_variables.insert(a.clone()); - cc.value_bindings.insert(a.clone(), TypedValue::Keyword(NamespacedKeyword::new("foo", "bar"))); - cc.apply_pattern(&schema, Pattern { - source: None, - entity: PatternNonValuePlace::Variable(x.clone()), - attribute: PatternNonValuePlace::Variable(a.clone()), - value: PatternValuePlace::Variable(v.clone()), - tx: PatternNonValuePlace::Placeholder, - }); - - // println!("{:#?}", cc); - - let d0_e = QualifiedAlias("datoms00".to_string(), DatomsColumn::Entity); - let d0_a = QualifiedAlias("datoms00".to_string(), DatomsColumn::Attribute); - - assert!(!cc.is_known_empty); - assert_eq!(cc.from, vec![SourceAlias(DatomsTable::Datoms, "datoms00".to_string())]); - - // ?x must be a ref. - assert_eq!(cc.known_type(&x).unwrap(), ValueType::Ref); - - // ?x is bound to datoms0.e. - assert_eq!(cc.column_bindings.get(&x).unwrap(), &vec![d0_e.clone()]); - assert_eq!(cc.wheres, vec![ - ColumnConstraint::Equals(d0_a, QueryValue::Entid(99)), - ].into()); - } - - /// Queries that bind non-entity values to entity places can't return results. - #[test] - fn test_bind_the_wrong_thing() { - let mut cc = ConjoiningClauses::default(); - let schema = Schema::default(); - - let x = Variable(PlainSymbol::new("?x")); - let a = Variable(PlainSymbol::new("?a")); - let v = Variable(PlainSymbol::new("?v")); - let hello = TypedValue::String("hello".to_string()); - - cc.input_variables.insert(a.clone()); - cc.value_bindings.insert(a.clone(), hello.clone()); - cc.apply_pattern(&schema, Pattern { - source: None, - entity: PatternNonValuePlace::Variable(x.clone()), - attribute: PatternNonValuePlace::Variable(a.clone()), - value: PatternValuePlace::Variable(v.clone()), - tx: PatternNonValuePlace::Placeholder, - }); - - assert!(cc.is_known_empty); - assert_eq!(cc.empty_because.unwrap(), EmptyBecause::InvalidBinding(DatomsColumn::Attribute, hello)); - } - - - /// This test ensures that we query all_datoms if we're possibly retrieving a string. - #[test] - fn test_apply_unattributed_pattern_with_returned() { - let mut cc = ConjoiningClauses::default(); - let schema = Schema::default(); - - let x = Variable(PlainSymbol::new("?x")); - let a = Variable(PlainSymbol::new("?a")); - let v = Variable(PlainSymbol::new("?v")); - cc.apply_pattern(&schema, Pattern { - source: None, - entity: PatternNonValuePlace::Variable(x.clone()), - attribute: PatternNonValuePlace::Variable(a.clone()), - value: PatternValuePlace::Variable(v.clone()), - tx: PatternNonValuePlace::Placeholder, - }); - - // println!("{:#?}", cc); - - let d0_e = QualifiedAlias("all_datoms00".to_string(), DatomsColumn::Entity); - - assert!(!cc.is_known_empty); - assert_eq!(cc.from, vec![SourceAlias(DatomsTable::AllDatoms, "all_datoms00".to_string())]); - - // ?x must be a ref. - assert_eq!(cc.known_type(&x).unwrap(), ValueType::Ref); - - // ?x is bound to datoms0.e. - assert_eq!(cc.column_bindings.get(&x).unwrap(), &vec![d0_e.clone()]); - assert_eq!(cc.wheres, vec![].into()); - } - - /// This test ensures that we query all_datoms if we're looking for a string. - #[test] - fn test_apply_unattributed_pattern_with_string_value() { - let mut cc = ConjoiningClauses::default(); - let schema = Schema::default(); - - let x = Variable(PlainSymbol::new("?x")); - cc.apply_pattern(&schema, Pattern { - source: None, - entity: PatternNonValuePlace::Variable(x.clone()), - attribute: PatternNonValuePlace::Placeholder, - value: PatternValuePlace::Constant(NonIntegerConstant::Text("hello".to_string())), - tx: PatternNonValuePlace::Placeholder, - }); - - // println!("{:#?}", cc); - - let d0_e = QualifiedAlias("all_datoms00".to_string(), DatomsColumn::Entity); - let d0_v = QualifiedAlias("all_datoms00".to_string(), DatomsColumn::Value); - - assert!(!cc.is_known_empty); - assert_eq!(cc.from, vec![SourceAlias(DatomsTable::AllDatoms, "all_datoms00".to_string())]); - - // ?x must be a ref. - assert_eq!(cc.known_type(&x).unwrap(), ValueType::Ref); - - // ?x is bound to datoms0.e. - assert_eq!(cc.column_bindings.get(&x).unwrap(), &vec![d0_e.clone()]); - - // Our 'where' clauses are two: - // - datoms0.v = 'hello' - // - datoms0.value_type_tag = string - // TODO: implement expand_type_tags. - assert_eq!(cc.wheres, vec![ - ColumnConstraint::Equals(d0_v, QueryValue::TypedValue(TypedValue::String("hello".to_string()))), - ColumnConstraint::HasType("all_datoms00".to_string(), ValueType::String), - ].into()); - } - - #[test] - fn test_apply_two_patterns() { - let mut cc = ConjoiningClauses::default(); - let mut schema = Schema::default(); - - associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 99); - associate_ident(&mut schema, NamespacedKeyword::new("foo", "roz"), 98); - add_attribute(&mut schema, 99, Attribute { - value_type: ValueType::Boolean, - ..Default::default() - }); - add_attribute(&mut schema, 98, Attribute { - value_type: ValueType::String, - ..Default::default() - }); - - let x = Variable(PlainSymbol::new("?x")); - let y = Variable(PlainSymbol::new("?y")); - cc.apply_pattern(&schema, Pattern { - source: None, - entity: PatternNonValuePlace::Variable(x.clone()), - attribute: PatternNonValuePlace::Ident(NamespacedKeyword::new("foo", "roz")), - value: PatternValuePlace::Constant(NonIntegerConstant::Text("idgoeshere".to_string())), - tx: PatternNonValuePlace::Placeholder, - }); - cc.apply_pattern(&schema, Pattern { - source: None, - entity: PatternNonValuePlace::Variable(x.clone()), - attribute: PatternNonValuePlace::Ident(NamespacedKeyword::new("foo", "bar")), - value: PatternValuePlace::Variable(y.clone()), - tx: PatternNonValuePlace::Placeholder, - }); - - // Finally, expand column bindings to get the overlaps for ?x. - cc.expand_column_bindings(); - - println!("{:#?}", cc); - - let d0_e = QualifiedAlias("datoms00".to_string(), DatomsColumn::Entity); - let d0_a = QualifiedAlias("datoms00".to_string(), DatomsColumn::Attribute); - let d0_v = QualifiedAlias("datoms00".to_string(), DatomsColumn::Value); - let d1_e = QualifiedAlias("datoms01".to_string(), DatomsColumn::Entity); - let d1_a = QualifiedAlias("datoms01".to_string(), DatomsColumn::Attribute); - - assert!(!cc.is_known_empty); - assert_eq!(cc.from, vec![ - SourceAlias(DatomsTable::Datoms, "datoms00".to_string()), - SourceAlias(DatomsTable::Datoms, "datoms01".to_string()), - ]); - - // ?x must be a ref. - assert_eq!(cc.known_type(&x).unwrap(), ValueType::Ref); - - // ?x is bound to datoms0.e and datoms1.e. - assert_eq!(cc.column_bindings.get(&x).unwrap(), - &vec![ - d0_e.clone(), - d1_e.clone(), - ]); - - // Our 'where' clauses are four: - // - datoms0.a = 98 (:foo/roz) - // - datoms0.v = "idgoeshere" - // - datoms1.a = 99 (:foo/bar) - // - datoms1.e = datoms0.e - assert_eq!(cc.wheres, vec![ - ColumnConstraint::Equals(d0_a, QueryValue::Entid(98)), - ColumnConstraint::Equals(d0_v, QueryValue::TypedValue(TypedValue::String("idgoeshere".to_string()))), - ColumnConstraint::Equals(d1_a, QueryValue::Entid(99)), - ColumnConstraint::Equals(d0_e, QueryValue::Column(d1_e)), - ].into()); - } - - #[test] - fn test_value_bindings() { - let mut schema = Schema::default(); - - associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 99); - add_attribute(&mut schema, 99, Attribute { - value_type: ValueType::Boolean, - ..Default::default() - }); - - let x = Variable(PlainSymbol::new("?x")); - let y = Variable(PlainSymbol::new("?y")); - - let b: BTreeMap = - vec![(y.clone(), TypedValue::Boolean(true))].into_iter().collect(); - let mut cc = ConjoiningClauses::with_value_bindings(b); - - cc.apply_pattern(&schema, Pattern { - source: None, - entity: PatternNonValuePlace::Variable(x.clone()), - attribute: PatternNonValuePlace::Ident(NamespacedKeyword::new("foo", "bar")), - value: PatternValuePlace::Variable(y.clone()), - tx: PatternNonValuePlace::Placeholder, - }); - - let d0_e = QualifiedAlias("datoms00".to_string(), DatomsColumn::Entity); - let d0_a = QualifiedAlias("datoms00".to_string(), DatomsColumn::Attribute); - let d0_v = QualifiedAlias("datoms00".to_string(), DatomsColumn::Value); - - // ?y has been expanded into `true`. - assert_eq!(cc.wheres, vec![ - ColumnConstraint::Equals(d0_a, QueryValue::Entid(99)), - ColumnConstraint::Equals(d0_v, QueryValue::TypedValue(TypedValue::Boolean(true))), - ].into()); - - // There is no binding for ?y. - assert!(!cc.column_bindings.contains_key(&y)); - - // ?x is bound to the entity. - assert_eq!(cc.column_bindings.get(&x).unwrap(), - &vec![d0_e.clone()]); - } - - #[test] - /// Bind a value to a variable in a query where the type of the value disagrees with the type of - /// the variable inferred from known attributes. - fn test_value_bindings_type_disagreement() { - let mut schema = Schema::default(); - - associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 99); - add_attribute(&mut schema, 99, Attribute { - value_type: ValueType::Boolean, - ..Default::default() - }); - - let x = Variable(PlainSymbol::new("?x")); - let y = Variable(PlainSymbol::new("?y")); - - let b: BTreeMap = - vec![(y.clone(), TypedValue::Long(42))].into_iter().collect(); - let mut cc = ConjoiningClauses::with_value_bindings(b); - - cc.apply_pattern(&schema, Pattern { - source: None, - entity: PatternNonValuePlace::Variable(x.clone()), - attribute: PatternNonValuePlace::Ident(NamespacedKeyword::new("foo", "bar")), - value: PatternValuePlace::Variable(y.clone()), - tx: PatternNonValuePlace::Placeholder, - }); - - // The type of the provided binding doesn't match the type of the attribute. - assert!(cc.is_known_empty); - } - - #[test] - /// Bind a non-textual value to a variable in a query where the variable is used as the value - /// of a fulltext-valued attribute. - fn test_fulltext_type_disagreement() { - let mut schema = Schema::default(); - - associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 99); - add_attribute(&mut schema, 99, Attribute { - value_type: ValueType::String, - fulltext: true, - ..Default::default() - }); - - let x = Variable(PlainSymbol::new("?x")); - let y = Variable(PlainSymbol::new("?y")); - - let b: BTreeMap = - vec![(y.clone(), TypedValue::Long(42))].into_iter().collect(); - let mut cc = ConjoiningClauses::with_value_bindings(b); - - cc.apply_pattern(&schema, Pattern { - source: None, - entity: PatternNonValuePlace::Variable(x.clone()), - attribute: PatternNonValuePlace::Ident(NamespacedKeyword::new("foo", "bar")), - value: PatternValuePlace::Variable(y.clone()), - tx: PatternNonValuePlace::Placeholder, - }); - - // The type of the provided binding doesn't match the type of the attribute. - assert!(cc.is_known_empty); - } - - #[test] - /// Apply two patterns: a pattern and a numeric predicate. - /// Verify that after application of the predicate we know that the value - /// must be numeric. - fn test_apply_numeric_predicate() { - let mut cc = ConjoiningClauses::default(); - let mut schema = Schema::default(); - - associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 99); - add_attribute(&mut schema, 99, Attribute { - value_type: ValueType::Long, - ..Default::default() - }); - - let x = Variable(PlainSymbol::new("?x")); - let y = Variable(PlainSymbol::new("?y")); - cc.apply_pattern(&schema, Pattern { - source: None, - entity: PatternNonValuePlace::Variable(x.clone()), - attribute: PatternNonValuePlace::Placeholder, - value: PatternValuePlace::Variable(y.clone()), - tx: PatternNonValuePlace::Placeholder, - }); - assert!(!cc.is_known_empty); - - let op = PlainSymbol::new("<"); - let comp = NumericComparison::from_datalog_operator(op.plain_name()).unwrap(); - assert!(cc.apply_numeric_predicate(&schema, comp, Predicate { - operator: op, - args: vec![ - FnArg::Variable(Variable(PlainSymbol::new("?y"))), FnArg::EntidOrInteger(10), - ]}).is_ok()); - - assert!(!cc.is_known_empty); - - // Finally, expand column bindings to get the overlaps for ?x. - cc.expand_column_bindings(); - assert!(!cc.is_known_empty); - - // After processing those two clauses, we know that ?y must be numeric, but not exactly - // which type it must be. - assert_eq!(None, cc.known_type(&y)); // Not just one. - let expected: HashSet = vec![ValueType::Double, ValueType::Long].into_iter().collect(); - assert_eq!(Some(&expected), cc.known_types.get(&y)); - - let clauses = cc.wheres; - assert_eq!(clauses.len(), 1); - assert_eq!(clauses.0[0], ColumnConstraint::NumericInequality { - operator: NumericComparison::LessThan, - left: QueryValue::Column(cc.column_bindings.get(&y).unwrap()[0].clone()), - right: QueryValue::TypedValue(TypedValue::Long(10)), - }.into()); - } - - #[test] - /// Apply three patterns: an unbound pattern to establish a value var, - /// a predicate to constrain the val to numeric types, and a third pattern to conflict with the - /// numeric types and cause the pattern to fail. - fn test_apply_conflict_with_numeric_range() { - let mut cc = ConjoiningClauses::default(); - let mut schema = Schema::default(); - - associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 99); - associate_ident(&mut schema, NamespacedKeyword::new("foo", "roz"), 98); - add_attribute(&mut schema, 99, Attribute { - value_type: ValueType::Long, - ..Default::default() - }); - add_attribute(&mut schema, 98, Attribute { - value_type: ValueType::String, - unique: Some(Unique::Identity), - ..Default::default() - }); - - let x = Variable(PlainSymbol::new("?x")); - let y = Variable(PlainSymbol::new("?y")); - cc.apply_pattern(&schema, Pattern { - source: None, - entity: PatternNonValuePlace::Variable(x.clone()), - attribute: PatternNonValuePlace::Placeholder, - value: PatternValuePlace::Variable(y.clone()), - tx: PatternNonValuePlace::Placeholder, - }); - assert!(!cc.is_known_empty); - - let op = PlainSymbol::new(">="); - let comp = NumericComparison::from_datalog_operator(op.plain_name()).unwrap(); - assert!(cc.apply_numeric_predicate(&schema, comp, Predicate { - operator: op, - args: vec![ - FnArg::Variable(Variable(PlainSymbol::new("?y"))), FnArg::EntidOrInteger(10), - ]}).is_ok()); - - assert!(!cc.is_known_empty); - cc.apply_pattern(&schema, Pattern { - source: None, - entity: PatternNonValuePlace::Variable(x.clone()), - attribute: PatternNonValuePlace::Ident(NamespacedKeyword::new("foo", "roz")), - value: PatternValuePlace::Variable(y.clone()), - tx: PatternNonValuePlace::Placeholder, - }); - - // Finally, expand column bindings to get the overlaps for ?x. - cc.expand_column_bindings(); - - assert!(cc.is_known_empty); - assert_eq!(cc.empty_because.unwrap(), - EmptyBecause::TypeMismatch(y.clone(), - vec![ValueType::Double, ValueType::Long].into_iter() - .collect(), - ValueType::String)); - } - - #[test] - /// Apply two patterns with differently typed attributes, but sharing a variable in the value - /// place. No value can bind to a variable and match both types, so the CC is known to return - /// no results. - fn test_apply_two_conflicting_known_patterns() { - let mut cc = ConjoiningClauses::default(); - let mut schema = Schema::default(); - - associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 99); - associate_ident(&mut schema, NamespacedKeyword::new("foo", "roz"), 98); - add_attribute(&mut schema, 99, Attribute { - value_type: ValueType::Boolean, - ..Default::default() - }); - add_attribute(&mut schema, 98, Attribute { - value_type: ValueType::String, - unique: Some(Unique::Identity), - ..Default::default() - }); - - let x = Variable(PlainSymbol::new("?x")); - let y = Variable(PlainSymbol::new("?y")); - cc.apply_pattern(&schema, Pattern { - source: None, - entity: PatternNonValuePlace::Variable(x.clone()), - attribute: PatternNonValuePlace::Ident(NamespacedKeyword::new("foo", "roz")), - value: PatternValuePlace::Variable(y.clone()), - tx: PatternNonValuePlace::Placeholder, - }); - cc.apply_pattern(&schema, Pattern { - source: None, - entity: PatternNonValuePlace::Variable(x.clone()), - attribute: PatternNonValuePlace::Ident(NamespacedKeyword::new("foo", "bar")), - value: PatternValuePlace::Variable(y.clone()), - tx: PatternNonValuePlace::Placeholder, - }); - - // Finally, expand column bindings to get the overlaps for ?x. - cc.expand_column_bindings(); - - assert!(cc.is_known_empty); - assert_eq!(cc.empty_because.unwrap(), - EmptyBecause::TypeMismatch(y.clone(), unit_type_set(ValueType::String), ValueType::Boolean)); - } - - #[test] - #[should_panic(expected = "assertion failed: cc.is_known_empty")] - /// This test needs range inference in order to succeed: we must deduce that ?y must - /// simultaneously be a boolean-valued attribute and a ref-valued attribute, and thus - /// the CC can never return results. - fn test_apply_two_implicitly_conflicting_patterns() { - let mut cc = ConjoiningClauses::default(); - let schema = Schema::default(); - - // [:find ?x :where - // [?x ?y true] - // [?z ?y ?x]] - let x = Variable(PlainSymbol::new("?x")); - let y = Variable(PlainSymbol::new("?y")); - let z = Variable(PlainSymbol::new("?z")); - cc.apply_pattern(&schema, Pattern { - source: None, - entity: PatternNonValuePlace::Variable(x.clone()), - attribute: PatternNonValuePlace::Variable(y.clone()), - value: PatternValuePlace::Constant(NonIntegerConstant::Boolean(true)), - tx: PatternNonValuePlace::Placeholder, - }); - cc.apply_pattern(&schema, Pattern { - source: None, - entity: PatternNonValuePlace::Variable(z.clone()), - attribute: PatternNonValuePlace::Variable(y.clone()), - value: PatternValuePlace::Variable(x.clone()), - tx: PatternNonValuePlace::Placeholder, - }); - - // Finally, expand column bindings to get the overlaps for ?x. - cc.expand_column_bindings(); - - assert!(cc.is_known_empty); - assert_eq!(cc.empty_because.unwrap(), - EmptyBecause::TypeMismatch(x.clone(), unit_type_set(ValueType::Ref), ValueType::Boolean)); - } -} diff --git a/query-algebrizer/src/clauses/mod.rs b/query-algebrizer/src/clauses/mod.rs new file mode 100644 index 00000000..732cd70a --- /dev/null +++ b/query-algebrizer/src/clauses/mod.rs @@ -0,0 +1,593 @@ +// Copyright 2016 Mozilla +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use +// this file except in compliance with the License. You may obtain a copy of the +// License at http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +use std::fmt::{ + Debug, + Formatter, +}; + +use std::collections::{ + BTreeMap, + BTreeSet, + HashSet, +}; + +use std::collections::btree_map::Entry; + +use mentat_core::{ + Attribute, + Entid, + Schema, + TypedValue, + ValueType, +}; + +use mentat_query::{ + NamespacedKeyword, + NonIntegerConstant, + Pattern, + PatternNonValuePlace, + PatternValuePlace, + Variable, + WhereClause, +}; + +use errors::{ + Result, +}; + +use types::{ + ColumnConstraint, + ColumnIntersection, + DatomsColumn, + DatomsTable, + EmptyBecause, + QualifiedAlias, + QueryValue, + SourceAlias, + TableAlias, +}; + +mod or; +mod pattern; +mod predicate; +mod resolve; + +use validate::validate_or_join; + +/// A thing that's capable of aliasing a table name for us. +/// This exists so that we can obtain predictable names in tests. +pub type TableAliaser = Box TableAlias>; + +pub fn default_table_aliaser() -> TableAliaser { + let mut i = -1; + Box::new(move |table| { + i += 1; + format!("{}{:02}", table.name(), i) + }) +} + +fn unit_type_set(t: ValueType) -> HashSet { + let mut s = HashSet::with_capacity(1); + s.insert(t); + s +} + +/// A `ConjoiningClauses` (CC) is a collection of clauses that are combined with `JOIN`. +/// The topmost form in a query is a `ConjoiningClauses`. +/// +/// - Ordinary pattern clauses turn into `FROM` parts and `WHERE` parts using `=`. +/// - Predicate clauses turn into the same, but with other functions. +/// - `not` turns into `NOT EXISTS` with `WHERE` clauses inside the subquery to +/// bind it to the outer variables, or adds simple `WHERE` clauses to the outer +/// clause. +/// - `not-join` is similar, but with explicit binding. +/// - `or` turns into a collection of `UNION`s inside a subquery, or a simple +/// alternation. +/// `or`'s documentation states that all clauses must include the same vars, +/// but that's an over-simplification: all clauses must refer to the external +/// unification vars. +/// The entire `UNION`-set is `JOIN`ed to any surrounding expressions per the `rule-vars` +/// clause, or the intersection of the vars in the two sides of the `JOIN`. +/// +/// Not yet done: +/// - Function clauses with bindings turn into: +/// * Subqueries. Perhaps less efficient? Certainly clearer. +/// * Projection expressions, if only used for output. +/// * Inline expressions? +///--------------------------------------------------------------------------------------- +pub struct ConjoiningClauses { + /// `true` if this set of clauses cannot yield results in the context of the current schema. + pub is_known_empty: bool, + pub empty_because: Option, + + /// A function used to generate an alias for a table -- e.g., from "datoms" to "datoms123". + aliaser: TableAliaser, + + /// A vector of source/alias pairs used to construct a SQL `FROM` list. + pub from: Vec, + + /// A list of fragments that can be joined by `AND`. + pub wheres: ColumnIntersection, + + /// A map from var to qualified columns. Used to project. + pub column_bindings: BTreeMap>, + + /// A list of variables mentioned in the enclosing query's :in clause. These must all be bound + /// before the query can be executed. TODO: clarify what this means for nested CCs. + pub input_variables: BTreeSet, + + /// In some situations -- e.g., when a query is being run only once -- we know in advance the + /// values bound to some or all variables. These can be substituted directly when the query is + /// algebrized. + /// + /// Value bindings must agree with `known_types`. If you write a query like + /// ```edn + /// [:find ?x :in $ ?val :where [?x :foo/int ?val]] + /// ``` + /// + /// and for `?val` provide `TypedValue::String("foo".to_string())`, the query will be known at + /// algebrizing time to be empty. + value_bindings: BTreeMap, + + /// A map from var to type. Whenever a var maps unambiguously to two different types, it cannot + /// yield results, so we don't represent that case here. If a var isn't present in the map, it + /// means that its type is not known in advance. + pub known_types: BTreeMap>, + + /// A mapping, similar to `column_bindings`, but used to pull type tags out of the store at runtime. + /// If a var isn't present in `known_types`, it should be present here. + extracted_types: BTreeMap, +} + +impl Debug for ConjoiningClauses { + fn fmt(&self, fmt: &mut Formatter) -> ::std::fmt::Result { + fmt.debug_struct("ConjoiningClauses") + .field("is_known_empty", &self.is_known_empty) + .field("from", &self.from) + .field("wheres", &self.wheres) + .field("column_bindings", &self.column_bindings) + .field("input_variables", &self.input_variables) + .field("value_bindings", &self.value_bindings) + .field("known_types", &self.known_types) + .field("extracted_types", &self.extracted_types) + .finish() + } +} + +/// Basics. +impl Default for ConjoiningClauses { + fn default() -> ConjoiningClauses { + ConjoiningClauses { + is_known_empty: false, + empty_because: None, + aliaser: default_table_aliaser(), + from: vec![], + wheres: ColumnIntersection::default(), + input_variables: BTreeSet::new(), + column_bindings: BTreeMap::new(), + value_bindings: BTreeMap::new(), + known_types: BTreeMap::new(), + extracted_types: BTreeMap::new(), + } + } +} + +impl ConjoiningClauses { + #[allow(dead_code)] + fn with_value_bindings(bindings: BTreeMap) -> ConjoiningClauses { + let mut cc = ConjoiningClauses { + value_bindings: bindings, + ..Default::default() + }; + + // Pre-fill our type mappings with the types of the input bindings. + cc.known_types + .extend(cc.value_bindings.iter() + .map(|(k, v)| (k.clone(), unit_type_set(v.value_type())))); + cc + } +} + +impl ConjoiningClauses { + fn bound_value(&self, var: &Variable) -> Option { + self.value_bindings.get(var).cloned() + } + + /// Return a single `ValueType` if the given variable is known to have a precise type. + /// Returns `None` if the type of the variable is unknown. + /// Returns `None` if the type of the variable is known but not precise -- "double + /// or integer" isn't good enough. + pub fn known_type(&self, var: &Variable) -> Option { + match self.known_types.get(var) { + Some(types) if types.len() == 1 => types.iter().next().cloned(), + _ => None, + } + } + + pub fn bind_column_to_var(&mut self, schema: &Schema, table: TableAlias, column: DatomsColumn, var: Variable) { + // Do we have an external binding for this? + if let Some(bound_val) = self.bound_value(&var) { + // Great! Use that instead. + // We expect callers to do things like bind keywords here; we need to translate these + // before they hit our constraints. + // TODO: recognize when the valueType might be a ref and also translate entids there. + if column == DatomsColumn::Value { + self.constrain_column_to_constant(table, column, bound_val); + } else { + match bound_val { + TypedValue::Keyword(ref kw) => { + if let Some(entid) = self.entid_for_ident(schema, kw) { + self.constrain_column_to_entity(table, column, entid); + } else { + // Impossible. + // For attributes this shouldn't occur, because we check the binding in + // `table_for_places`/`alias_table`, and if it didn't resolve to a valid + // attribute then we should have already marked the pattern as empty. + self.mark_known_empty(EmptyBecause::UnresolvedIdent(kw.clone())); + } + }, + TypedValue::Ref(entid) => { + self.constrain_column_to_entity(table, column, entid); + }, + _ => { + // One can't bind an e, a, or tx to something other than an entity. + self.mark_known_empty(EmptyBecause::InvalidBinding(column, bound_val)); + }, + } + } + + return; + } + + // Will we have an external binding for this? + // If so, we don't need to extract its type. We'll know it later. + let late_binding = self.input_variables.contains(&var); + + // If this is a value, and we don't already know its type or where + // to get its type, record that we can get it from this table. + let needs_type_extraction = + !late_binding && // Never need to extract for bound vars. + column == DatomsColumn::Value && // Never need to extract types for refs. + self.known_type(&var).is_none() && // Don't need to extract if we know a single type. + !self.extracted_types.contains_key(&var); // We're already extracting the type. + + let alias = QualifiedAlias(table, column); + + // If we subsequently find out its type, we'll remove this later -- see + // the removal in `constrain_var_to_type`. + if needs_type_extraction { + self.extracted_types.insert(var.clone(), alias.for_type_tag()); + } + self.column_bindings.entry(var).or_insert(vec![]).push(alias); + } + + pub fn constrain_column_to_constant(&mut self, table: TableAlias, column: DatomsColumn, constant: TypedValue) { + self.wheres.also(ColumnConstraint::Equals(QualifiedAlias(table, column), QueryValue::TypedValue(constant))) + } + + pub fn constrain_column_to_entity(&mut self, table: TableAlias, column: DatomsColumn, entity: Entid) { + self.wheres.also(ColumnConstraint::Equals(QualifiedAlias(table, column), QueryValue::Entid(entity))) + } + + pub fn constrain_attribute(&mut self, table: TableAlias, attribute: Entid) { + self.constrain_column_to_entity(table, DatomsColumn::Attribute, attribute) + } + + pub fn constrain_value_to_numeric(&mut self, table: TableAlias, value: i64) { + self.wheres.also(ColumnConstraint::Equals( + QualifiedAlias(table, DatomsColumn::Value), + QueryValue::PrimitiveLong(value))) + } + + /// Mark the given value as one of the set of numeric types. + fn constrain_var_to_numeric(&mut self, variable: Variable) { + let mut numeric_types = HashSet::with_capacity(2); + numeric_types.insert(ValueType::Double); + numeric_types.insert(ValueType::Long); + + let entry = self.known_types.entry(variable); + match entry { + Entry::Vacant(vacant) => { + vacant.insert(numeric_types); + }, + Entry::Occupied(mut occupied) => { + let narrowed: HashSet = numeric_types.intersection(occupied.get()).cloned().collect(); + match narrowed.len() { + 0 => { + // TODO: can't borrow as mutable more than once! + //self.mark_known_empty(EmptyBecause::TypeMismatch(occupied.key().clone(), occupied.get().clone(), ValueType::Double)); // I know… + }, + 1 => { + // Hooray! + self.extracted_types.remove(occupied.key()); + }, + _ => { + }, + }; + occupied.insert(narrowed); + }, + } + } + + /// Constrains the var if there's no existing type. + /// Marks as known-empty if it's impossible for this type to apply because there's a conflicting + /// type already known. + fn constrain_var_to_type(&mut self, variable: Variable, this_type: ValueType) { + // If this variable now has a known attribute, we can unhook extracted types for + // any other instances of that variable. + // For example, given + // + // ```edn + // [:find ?v :where [?x ?a ?v] [?y :foo/int ?v]] + // ``` + // + // we will initially choose to extract the type tag for `?v`, but on encountering + // the second pattern we can avoid that. + self.extracted_types.remove(&variable); + + // Is there an existing mapping for this variable? + // Any known inputs have already been added to known_types, and so if they conflict we'll + // spot it here. + if let Some(existing) = self.known_types.insert(variable.clone(), unit_type_set(this_type)) { + // There was an existing mapping. Does this type match? + if !existing.contains(&this_type) { + self.mark_known_empty(EmptyBecause::TypeMismatch(variable, existing, this_type)); + } + } + } + + /// Ensure that the given place has the correct types to be a tx-id. + /// Right now this is mostly unimplemented: we fail hard if anything but a placeholder is + /// present. + fn constrain_to_tx(&mut self, tx: &PatternNonValuePlace) { + match *tx { + PatternNonValuePlace::Placeholder => (), + _ => unimplemented!(), // TODO + } + } + + /// Ensure that the given place can be an entity, and is congruent with existing types. + /// This is used for `entity` and `attribute` places in a pattern. + fn constrain_to_ref(&mut self, value: &PatternNonValuePlace) { + // If it's a variable, record that it has the right type. + // Ident or attribute resolution errors (the only other check we need to do) will be done + // by the caller. + if let &PatternNonValuePlace::Variable(ref v) = value { + self.constrain_var_to_type(v.clone(), ValueType::Ref) + } + } + + fn mark_known_empty(&mut self, why: EmptyBecause) { + self.is_known_empty = true; + if self.empty_because.is_some() { + return; + } + println!("CC known empty: {:?}.", &why); // TODO: proper logging. + self.empty_because = Some(why); + } + + fn entid_for_ident<'s, 'a>(&self, schema: &'s Schema, ident: &'a NamespacedKeyword) -> Option { + schema.get_entid(&ident) + } + + fn table_for_attribute_and_value<'s, 'a>(&self, attribute: &'s Attribute, value: &'a PatternValuePlace) -> ::std::result::Result { + if attribute.fulltext { + match value { + &PatternValuePlace::Placeholder => + Ok(DatomsTable::Datoms), // We don't need the value. + + // TODO: an existing non-string binding can cause this pattern to fail. + &PatternValuePlace::Variable(_) => + Ok(DatomsTable::AllDatoms), + + &PatternValuePlace::Constant(NonIntegerConstant::Text(_)) => + Ok(DatomsTable::AllDatoms), + + _ => { + // We can't succeed if there's a non-string constant value for a fulltext + // field. + Err(EmptyBecause::NonStringFulltextValue) + }, + } + } else { + Ok(DatomsTable::Datoms) + } + } + + fn table_for_unknown_attribute<'s, 'a>(&self, value: &'a PatternValuePlace) -> ::std::result::Result { + // If the value is known to be non-textual, we can simply use the regular datoms + // table (TODO: and exclude on `index_fulltext`!). + // + // If the value is a placeholder too, then we can walk the non-value-joined view, + // because we don't care about retrieving the fulltext value. + // + // If the value is a variable or string, we must use `all_datoms`, or do the join + // ourselves, because we'll need to either extract or compare on the string. + Ok( + match value { + // TODO: see if the variable is projected, aggregated, or compared elsewhere in + // the query. If it's not, we don't need to use all_datoms here. + &PatternValuePlace::Variable(ref v) => { + // Do we know that this variable can't be a string? If so, we don't need + // AllDatoms. None or String means it could be or definitely is. + match self.known_types.get(v).map(|types| types.contains(&ValueType::String)) { + Some(false) => DatomsTable::Datoms, + _ => DatomsTable::AllDatoms, + } + } + &PatternValuePlace::Constant(NonIntegerConstant::Text(_)) => + DatomsTable::AllDatoms, + _ => + DatomsTable::Datoms, + }) + } + + /// Decide which table to use for the provided attribute and value. + /// If the attribute input or value binding doesn't name an attribute, or doesn't name an + /// attribute that is congruent with the supplied value, we return an `EmptyBecause`. + /// The caller is responsible for marking the CC as known-empty if this is a fatal failure. + fn table_for_places<'s, 'a>(&self, schema: &'s Schema, attribute: &'a PatternNonValuePlace, value: &'a PatternValuePlace) -> ::std::result::Result { + match attribute { + &PatternNonValuePlace::Ident(ref kw) => + schema.attribute_for_ident(kw) + .ok_or_else(|| EmptyBecause::InvalidAttributeIdent(kw.clone())) + .and_then(|attribute| self.table_for_attribute_and_value(attribute, value)), + &PatternNonValuePlace::Entid(id) => + schema.attribute_for_entid(id) + .ok_or_else(|| EmptyBecause::InvalidAttributeEntid(id)) + .and_then(|attribute| self.table_for_attribute_and_value(attribute, value)), + // TODO: In a prepared context, defer this decision until a second algebrizing phase. + // #278. + &PatternNonValuePlace::Placeholder => + self.table_for_unknown_attribute(value), + &PatternNonValuePlace::Variable(ref v) => { + // See if we have a binding for the variable. + match self.bound_value(v) { + // TODO: In a prepared context, defer this decision until a second algebrizing phase. + // #278. + None => + self.table_for_unknown_attribute(value), + Some(TypedValue::Ref(id)) => + // Recurse: it's easy. + self.table_for_places(schema, &PatternNonValuePlace::Entid(id), value), + Some(TypedValue::Keyword(ref kw)) => + // Don't recurse: avoid needing to clone the keyword. + schema.attribute_for_ident(kw) + .ok_or_else(|| EmptyBecause::InvalidAttributeIdent(kw.clone())) + .and_then(|attribute| self.table_for_attribute_and_value(attribute, value)), + Some(v) => { + // This pattern cannot match: the caller has bound a non-entity value to an + // attribute place. + Err(EmptyBecause::InvalidBinding(DatomsColumn::Attribute, v.clone())) + }, + } + }, + } + } + + /// Produce a (table, alias) pair to handle the provided pattern. + /// This is a mutating method because it mutates the aliaser function! + /// Note that if this function decides that a pattern cannot match, it will flip + /// `is_known_empty`. + fn alias_table<'s, 'a>(&mut self, schema: &'s Schema, pattern: &'a Pattern) -> Option { + self.table_for_places(schema, &pattern.attribute, &pattern.value) + .map_err(|reason| { + self.mark_known_empty(reason); + }) + .map(|table| SourceAlias(table, (self.aliaser)(table))) + .ok() + } + + fn get_attribute<'s, 'a>(&self, schema: &'s Schema, pattern: &'a Pattern) -> Option<&'s Attribute> { + match pattern.attribute { + PatternNonValuePlace::Entid(id) => + schema.attribute_for_entid(id), + PatternNonValuePlace::Ident(ref kw) => + schema.attribute_for_ident(kw), + _ => + None, + } + } + + fn get_value_type<'s, 'a>(&self, schema: &'s Schema, pattern: &'a Pattern) -> Option { + self.get_attribute(schema, pattern).map(|x| x.value_type) + } +} + +/// Expansions. +impl ConjoiningClauses { + + /// Take the contents of `column_bindings` and generate inter-constraints for the appropriate + /// columns into `wheres`. + /// + /// For example, a bindings map associating a var to three places in the query, like + /// + /// ```edn + /// {?foo [datoms12.e datoms13.v datoms14.e]} + /// ``` + /// + /// produces two additional constraints: + /// + /// ```example + /// datoms12.e = datoms13.v + /// datoms12.e = datoms14.e + /// ``` + pub fn expand_column_bindings(&mut self) { + for cols in self.column_bindings.values() { + if cols.len() > 1 { + let ref primary = cols[0]; + let secondaries = cols.iter().skip(1); + for secondary in secondaries { + // TODO: if both primary and secondary are .v, should we make sure + // the type tag columns also match? + // We don't do so in the ClojureScript version. + self.wheres.also(ColumnConstraint::Equals(primary.clone(), QueryValue::Column(secondary.clone()))); + } + } + } + } + + /// When a CC has accumulated all patterns, generate value_type_tag entries in `wheres` + /// to refine value types for which two things are true: + /// + /// - There are two or more different types with the same SQLite representation. E.g., + /// ValueType::Boolean shares a representation with Integer and Ref. + /// - There is no attribute constraint present in the CC. + /// + /// It's possible at this point for the space of acceptable type tags to not intersect: e.g., + /// for the query + /// + /// ```edn + /// [:find ?x :where + /// [?x ?y true] + /// [?z ?y ?x]] + /// ``` + /// + /// where `?y` must simultaneously be a ref-typed attribute and a boolean-typed attribute. This + /// function deduces that and calls `self.mark_known_empty`. #293. + pub fn expand_type_tags(&mut self) { + // TODO. + } +} + +impl ConjoiningClauses { + // This is here, rather than in `lib.rs`, because it's recursive: `or` can contain `or`, + // and so on. + pub fn apply_clause(&mut self, schema: &Schema, where_clause: WhereClause) -> Result<()> { + match where_clause { + WhereClause::Pattern(p) => { + self.apply_pattern(schema, p); + Ok(()) + }, + WhereClause::Pred(p) => { + self.apply_predicate(schema, p) + }, + WhereClause::OrJoin(o) => { + validate_or_join(&o) + //?; + //self.apply_or_join(schema, o) + }, + _ => unimplemented!(), + } + } +} + +// These are helpers that tests use to build Schema instances. +#[cfg(test)] +fn associate_ident(schema: &mut Schema, i: NamespacedKeyword, e: Entid) { + schema.entid_map.insert(e, i.clone()); + schema.ident_map.insert(i.clone(), e); +} + +#[cfg(test)] +fn add_attribute(schema: &mut Schema, e: Entid, a: Attribute) { + schema.schema_map.insert(e, a); +} \ No newline at end of file diff --git a/query-algebrizer/src/clauses/or.rs b/query-algebrizer/src/clauses/or.rs new file mode 100644 index 00000000..d061d48d --- /dev/null +++ b/query-algebrizer/src/clauses/or.rs @@ -0,0 +1,80 @@ +// Copyright 2016 Mozilla +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use +// this file except in compliance with the License. You may obtain a copy of the +// License at http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// WIP +#![allow(dead_code, unused_imports, unused_variables)] + +use mentat_core::{ + Entid, + Schema, + TypedValue, + ValueType, +}; + +use mentat_query::{ + NonIntegerConstant, + Pattern, + PatternValuePlace, + PatternNonValuePlace, + PlainSymbol, + Predicate, + SrcVar, +}; + +use clauses::ConjoiningClauses; + +use errors::{ + Result, + Error, + ErrorKind, +}; + +use types::{ + ColumnConstraint, + ColumnIntersection, + DatomsColumn, + DatomsTable, + EmptyBecause, + NumericComparison, + OrJoinKind, + QualifiedAlias, + QueryValue, + SourceAlias, + TableAlias, +}; + + + + +/// Return true if both left and right are the same variable or both are non-variable. +fn _simply_matches_place(left: &PatternNonValuePlace, right: &PatternNonValuePlace) -> bool { + match (left, right) { + (&PatternNonValuePlace::Variable(ref a), &PatternNonValuePlace::Variable(ref b)) => a == b, + (&PatternNonValuePlace::Placeholder, &PatternNonValuePlace::Placeholder) => true, + (&PatternNonValuePlace::Entid(_), &PatternNonValuePlace::Entid(_)) => true, + (&PatternNonValuePlace::Entid(_), &PatternNonValuePlace::Ident(_)) => true, + (&PatternNonValuePlace::Ident(_), &PatternNonValuePlace::Ident(_)) => true, + (&PatternNonValuePlace::Ident(_), &PatternNonValuePlace::Entid(_)) => true, + _ => false, + } +} + +/// Return true if both left and right are the same variable or both are non-variable. +fn _simply_matches_value_place(left: &PatternValuePlace, right: &PatternValuePlace) -> bool { + match (left, right) { + (&PatternValuePlace::Variable(ref a), &PatternValuePlace::Variable(ref b)) => a == b, + (&PatternValuePlace::Placeholder, &PatternValuePlace::Placeholder) => true, + (&PatternValuePlace::Variable(_), _) => false, + (_, &PatternValuePlace::Variable(_)) => false, + (&PatternValuePlace::Placeholder, _) => false, + (_, &PatternValuePlace::Placeholder) => false, + _ => true, + } +} diff --git a/query-algebrizer/src/clauses/pattern.rs b/query-algebrizer/src/clauses/pattern.rs new file mode 100644 index 00000000..cc7c901d --- /dev/null +++ b/query-algebrizer/src/clauses/pattern.rs @@ -0,0 +1,814 @@ +// Copyright 2016 Mozilla +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use +// this file except in compliance with the License. You may obtain a copy of the +// License at http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +use mentat_core::{ + Schema, + TypedValue, + ValueType, +}; + +use mentat_query::{ + Pattern, + PatternValuePlace, + PatternNonValuePlace, + SrcVar, +}; + +use clauses::ConjoiningClauses; + +use types::{ + ColumnConstraint, + DatomsColumn, + EmptyBecause, + SourceAlias, +}; + +/// Application of patterns. +impl ConjoiningClauses { + + /// Apply the constraints in the provided pattern to this CC. + /// + /// This is a single-pass process, which means it is naturally incomplete, failing to take into + /// account all information spread across two patterns. + /// + /// If the constraints cannot be satisfied -- for example, if this pattern includes a numeric + /// attribute and a string value -- then the `is_known_empty` field on the CC is flipped and + /// the function returns. + /// + /// A pattern being impossible to satisfy isn't necessarily a bad thing -- this query might + /// have branched clauses that apply to different knowledge bases, and might refer to + /// vocabulary that isn't (yet) used in this one. + /// + /// Most of the work done by this function depends on the schema and ident maps in the DB. If + /// these change, then any work done is invalid. + /// + /// There's a lot more we can do here and later by examining the + /// attribute: + /// + /// - If it's unique, and we have patterns like + /// + /// [?x :foo/unique 5] [?x :foo/unique ?y] + /// + /// then we can either prove impossibility (the two objects must + /// be equal) or deduce identity and simplify the query. + /// + /// - The same, if it's cardinality-one and the entity is known. + /// + /// - If there's a value index on this attribute, we might want to + /// run this pattern early in the query. + /// + /// - A unique-valued attribute can sometimes be rewritten into an + /// existence subquery instead of a join. + fn apply_pattern_clause_for_alias<'s>(&mut self, schema: &'s Schema, pattern: &Pattern, alias: &SourceAlias) { + if self.is_known_empty { + return; + } + + // Process each place in turn, applying constraints. + // Both `e` and `a` must be entities, which is equivalent here + // to being typed as Ref. + // Sorry for the duplication; Rust makes it a pain to abstract this. + + // The transaction part of a pattern must be an entid, variable, or placeholder. + self.constrain_to_tx(&pattern.tx); + self.constrain_to_ref(&pattern.entity); + self.constrain_to_ref(&pattern.attribute); + + let ref col = alias.1; + + match pattern.entity { + PatternNonValuePlace::Placeholder => + // Placeholders don't contribute any column bindings, nor do + // they constrain the query -- there's no need to produce + // IS NOT NULL, because we don't store nulls in our schema. + (), + PatternNonValuePlace::Variable(ref v) => + self.bind_column_to_var(schema, col.clone(), DatomsColumn::Entity, v.clone()), + PatternNonValuePlace::Entid(entid) => + self.constrain_column_to_entity(col.clone(), DatomsColumn::Entity, entid), + PatternNonValuePlace::Ident(ref ident) => { + if let Some(entid) = self.entid_for_ident(schema, ident) { + self.constrain_column_to_entity(col.clone(), DatomsColumn::Entity, entid) + } else { + // A resolution failure means we're done here. + self.mark_known_empty(EmptyBecause::UnresolvedIdent(ident.clone())); + return; + } + } + } + + match pattern.attribute { + PatternNonValuePlace::Placeholder => + (), + PatternNonValuePlace::Variable(ref v) => + self.bind_column_to_var(schema, col.clone(), DatomsColumn::Attribute, v.clone()), + PatternNonValuePlace::Entid(entid) => { + if !schema.is_attribute(entid) { + // Furthermore, that entid must resolve to an attribute. If it doesn't, this + // query is meaningless. + self.mark_known_empty(EmptyBecause::InvalidAttributeEntid(entid)); + return; + } + self.constrain_attribute(col.clone(), entid) + }, + PatternNonValuePlace::Ident(ref ident) => { + if let Some(entid) = self.entid_for_ident(schema, ident) { + self.constrain_attribute(col.clone(), entid); + + if !schema.is_attribute(entid) { + self.mark_known_empty(EmptyBecause::InvalidAttributeIdent(ident.clone())); + return; + } + } else { + // A resolution failure means we're done here. + self.mark_known_empty(EmptyBecause::UnresolvedIdent(ident.clone())); + return; + } + } + } + + // Determine if the pattern's value type is known. + // We do so by examining the value place and the attribute. + // At this point it's possible that the type of the value is + // inconsistent with the attribute; in that case this pattern + // cannot return results, and we short-circuit. + let value_type = self.get_value_type(schema, pattern); + + match pattern.value { + PatternValuePlace::Placeholder => + (), + + PatternValuePlace::Variable(ref v) => { + if let Some(this_type) = value_type { + // Wouldn't it be nice if we didn't need to clone in the found case? + // It doesn't matter too much: collisons won't be too frequent. + self.constrain_var_to_type(v.clone(), this_type); + if self.is_known_empty { + return; + } + } + + self.bind_column_to_var(schema, col.clone(), DatomsColumn::Value, v.clone()); + }, + PatternValuePlace::EntidOrInteger(i) => + // If we know the valueType, then we can determine whether this is an entid or an + // integer. If we don't, then we must generate a more general query with a + // value_type_tag. + if let Some(ValueType::Ref) = value_type { + self.constrain_column_to_entity(col.clone(), DatomsColumn::Value, i); + } else { + // If we have a pattern like: + // + // `[123 ?a 1]` + // + // then `1` could be an entid (ref), a long, a boolean, or an instant. + // + // We represent these constraints during execution: + // + // - Constraining the value column to the plain numeric value '1'. + // - Constraining its type column to one of a set of types. + // + self.constrain_value_to_numeric(col.clone(), i); + }, + PatternValuePlace::IdentOrKeyword(ref kw) => { + // If we know the valueType, then we can determine whether this is an ident or a + // keyword. If we don't, then we must generate a more general query with a + // value_type_tag. + // We can also speculatively try to resolve it as an ident; if we fail, then we + // know it can only return results if treated as a keyword, and we can treat it as + // such. + if let Some(ValueType::Ref) = value_type { + if let Some(entid) = self.entid_for_ident(schema, kw) { + self.constrain_column_to_entity(col.clone(), DatomsColumn::Value, entid) + } else { + // A resolution failure means we're done here: this attribute must have an + // entity value. + self.mark_known_empty(EmptyBecause::UnresolvedIdent(kw.clone())); + return; + } + } else { + // It must be a keyword. + self.constrain_column_to_constant(col.clone(), DatomsColumn::Value, TypedValue::Keyword(kw.clone())); + self.wheres.also(ColumnConstraint::HasType(col.clone(), ValueType::Keyword)); + }; + }, + PatternValuePlace::Constant(ref c) => { + // TODO: don't allocate. + let typed_value = c.clone().into_typed_value(); + if !typed_value.is_congruent_with(value_type) { + // If the attribute and its value don't match, the pattern must fail. + // We can never have a congruence failure if `value_type` is `None`, so we + // forcibly unwrap here. + let value_type = value_type.expect("Congruence failure but couldn't unwrap"); + let why = EmptyBecause::ValueTypeMismatch(value_type, typed_value); + self.mark_known_empty(why); + return; + } + + // TODO: if we don't know the type of the attribute because we don't know the + // attribute, we can actually work backwards to the set of appropriate attributes + // from the type of the value itself! #292. + let typed_value_type = typed_value.value_type(); + self.constrain_column_to_constant(col.clone(), DatomsColumn::Value, typed_value); + + // If we can't already determine the range of values in the DB from the attribute, + // then we must also constrain the type tag. + // + // Input values might be: + // + // - A long. This is handled by EntidOrInteger. + // - A boolean. This is unambiguous. + // - A double. This is currently unambiguous, though note that SQLite will equate 5.0 with 5. + // - A string. This is unambiguous. + // - A keyword. This is unambiguous. + // + // Because everything we handle here is unambiguous, we generate a single type + // restriction from the value type of the typed value. + if value_type.is_none() { + self.wheres.also(ColumnConstraint::HasType(col.clone(), typed_value_type)); + } + + }, + } + + } + + pub fn apply_pattern<'s, 'p>(&mut self, schema: &'s Schema, pattern: Pattern) { + // For now we only support the default source. + match pattern.source { + Some(SrcVar::DefaultSrc) | None => (), + _ => unimplemented!(), + }; + + if let Some(alias) = self.alias_table(schema, &pattern) { + self.apply_pattern_clause_for_alias(schema, &pattern, &alias); + self.from.push(alias); + } else { + // We didn't determine a table, likely because there was a mismatch + // between an attribute and a value. + // We know we cannot return a result, so we short-circuit here. + self.mark_known_empty(EmptyBecause::AttributeLookupFailed); + return; + } + } +} + +#[cfg(test)] +mod testing { + use super::*; + + use std::collections::BTreeMap; + + use mentat_core::attribute::Unique; + use mentat_core::{ + Attribute, + }; + + use mentat_query::{ + NamespacedKeyword, + NonIntegerConstant, + PlainSymbol, + Variable, + }; + + use clauses::{ + add_attribute, + associate_ident, + unit_type_set, + }; + + use types::{ + ColumnConstraint, + DatomsTable, + QualifiedAlias, + QueryValue, + SourceAlias, + }; + + #[test] + fn test_unknown_ident() { + let mut cc = ConjoiningClauses::default(); + let schema = Schema::default(); + + cc.apply_pattern(&schema, Pattern { + source: None, + entity: PatternNonValuePlace::Variable(Variable(PlainSymbol::new("?x"))), + attribute: PatternNonValuePlace::Ident(NamespacedKeyword::new("foo", "bar")), + value: PatternValuePlace::Constant(NonIntegerConstant::Boolean(true)), + tx: PatternNonValuePlace::Placeholder, + }); + + assert!(cc.is_known_empty); + } + + #[test] + fn test_unknown_attribute() { + let mut cc = ConjoiningClauses::default(); + let mut schema = Schema::default(); + + associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 99); + + cc.apply_pattern(&schema, Pattern { + source: None, + entity: PatternNonValuePlace::Variable(Variable(PlainSymbol::new("?x"))), + attribute: PatternNonValuePlace::Ident(NamespacedKeyword::new("foo", "bar")), + value: PatternValuePlace::Constant(NonIntegerConstant::Boolean(true)), + tx: PatternNonValuePlace::Placeholder, + }); + + assert!(cc.is_known_empty); + } + + #[test] + fn test_apply_simple_pattern() { + let mut cc = ConjoiningClauses::default(); + let mut schema = Schema::default(); + + associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 99); + add_attribute(&mut schema, 99, Attribute { + value_type: ValueType::Boolean, + ..Default::default() + }); + + let x = Variable(PlainSymbol::new("?x")); + cc.apply_pattern(&schema, Pattern { + source: None, + entity: PatternNonValuePlace::Variable(x.clone()), + attribute: PatternNonValuePlace::Ident(NamespacedKeyword::new("foo", "bar")), + value: PatternValuePlace::Constant(NonIntegerConstant::Boolean(true)), + tx: PatternNonValuePlace::Placeholder, + }); + + // println!("{:#?}", cc); + + let d0_e = QualifiedAlias("datoms00".to_string(), DatomsColumn::Entity); + let d0_a = QualifiedAlias("datoms00".to_string(), DatomsColumn::Attribute); + let d0_v = QualifiedAlias("datoms00".to_string(), DatomsColumn::Value); + + // After this, we know a lot of things: + assert!(!cc.is_known_empty); + assert_eq!(cc.from, vec![SourceAlias(DatomsTable::Datoms, "datoms00".to_string())]); + + // ?x must be a ref. + assert_eq!(cc.known_type(&x).unwrap(), ValueType::Ref); + + // ?x is bound to datoms0.e. + assert_eq!(cc.column_bindings.get(&x).unwrap(), &vec![d0_e.clone()]); + + // Our 'where' clauses are two: + // - datoms0.a = 99 + // - datoms0.v = true + // No need for a type tag constraint, because the attribute is known. + assert_eq!(cc.wheres, vec![ + ColumnConstraint::Equals(d0_a, QueryValue::Entid(99)), + ColumnConstraint::Equals(d0_v, QueryValue::TypedValue(TypedValue::Boolean(true))), + ].into()); + } + + #[test] + fn test_apply_unattributed_pattern() { + let mut cc = ConjoiningClauses::default(); + let schema = Schema::default(); + + let x = Variable(PlainSymbol::new("?x")); + cc.apply_pattern(&schema, Pattern { + source: None, + entity: PatternNonValuePlace::Variable(x.clone()), + attribute: PatternNonValuePlace::Placeholder, + value: PatternValuePlace::Constant(NonIntegerConstant::Boolean(true)), + tx: PatternNonValuePlace::Placeholder, + }); + + // println!("{:#?}", cc); + + let d0_e = QualifiedAlias("datoms00".to_string(), DatomsColumn::Entity); + let d0_v = QualifiedAlias("datoms00".to_string(), DatomsColumn::Value); + + assert!(!cc.is_known_empty); + assert_eq!(cc.from, vec![SourceAlias(DatomsTable::Datoms, "datoms00".to_string())]); + + // ?x must be a ref. + assert_eq!(cc.known_type(&x).unwrap(), ValueType::Ref); + + // ?x is bound to datoms0.e. + assert_eq!(cc.column_bindings.get(&x).unwrap(), &vec![d0_e.clone()]); + + // Our 'where' clauses are two: + // - datoms0.v = true + // - datoms0.value_type_tag = boolean + // TODO: implement expand_type_tags. + assert_eq!(cc.wheres, vec![ + ColumnConstraint::Equals(d0_v, QueryValue::TypedValue(TypedValue::Boolean(true))), + ColumnConstraint::HasType("datoms00".to_string(), ValueType::Boolean), + ].into()); + } + + /// This test ensures that we do less work if we know the attribute thanks to a var lookup. + #[test] + fn test_apply_unattributed_but_bound_pattern_with_returned() { + let mut cc = ConjoiningClauses::default(); + let mut schema = Schema::default(); + associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 99); + add_attribute(&mut schema, 99, Attribute { + value_type: ValueType::Boolean, + ..Default::default() + }); + + let x = Variable(PlainSymbol::new("?x")); + let a = Variable(PlainSymbol::new("?a")); + let v = Variable(PlainSymbol::new("?v")); + + cc.input_variables.insert(a.clone()); + cc.value_bindings.insert(a.clone(), TypedValue::Keyword(NamespacedKeyword::new("foo", "bar"))); + cc.apply_pattern(&schema, Pattern { + source: None, + entity: PatternNonValuePlace::Variable(x.clone()), + attribute: PatternNonValuePlace::Variable(a.clone()), + value: PatternValuePlace::Variable(v.clone()), + tx: PatternNonValuePlace::Placeholder, + }); + + // println!("{:#?}", cc); + + let d0_e = QualifiedAlias("datoms00".to_string(), DatomsColumn::Entity); + let d0_a = QualifiedAlias("datoms00".to_string(), DatomsColumn::Attribute); + + assert!(!cc.is_known_empty); + assert_eq!(cc.from, vec![SourceAlias(DatomsTable::Datoms, "datoms00".to_string())]); + + // ?x must be a ref. + assert_eq!(cc.known_type(&x).unwrap(), ValueType::Ref); + + // ?x is bound to datoms0.e. + assert_eq!(cc.column_bindings.get(&x).unwrap(), &vec![d0_e.clone()]); + assert_eq!(cc.wheres, vec![ + ColumnConstraint::Equals(d0_a, QueryValue::Entid(99)), + ].into()); + } + + /// Queries that bind non-entity values to entity places can't return results. + #[test] + fn test_bind_the_wrong_thing() { + let mut cc = ConjoiningClauses::default(); + let schema = Schema::default(); + + let x = Variable(PlainSymbol::new("?x")); + let a = Variable(PlainSymbol::new("?a")); + let v = Variable(PlainSymbol::new("?v")); + let hello = TypedValue::String("hello".to_string()); + + cc.input_variables.insert(a.clone()); + cc.value_bindings.insert(a.clone(), hello.clone()); + cc.apply_pattern(&schema, Pattern { + source: None, + entity: PatternNonValuePlace::Variable(x.clone()), + attribute: PatternNonValuePlace::Variable(a.clone()), + value: PatternValuePlace::Variable(v.clone()), + tx: PatternNonValuePlace::Placeholder, + }); + + assert!(cc.is_known_empty); + assert_eq!(cc.empty_because.unwrap(), EmptyBecause::InvalidBinding(DatomsColumn::Attribute, hello)); + } + + + /// This test ensures that we query all_datoms if we're possibly retrieving a string. + #[test] + fn test_apply_unattributed_pattern_with_returned() { + let mut cc = ConjoiningClauses::default(); + let schema = Schema::default(); + + let x = Variable(PlainSymbol::new("?x")); + let a = Variable(PlainSymbol::new("?a")); + let v = Variable(PlainSymbol::new("?v")); + cc.apply_pattern(&schema, Pattern { + source: None, + entity: PatternNonValuePlace::Variable(x.clone()), + attribute: PatternNonValuePlace::Variable(a.clone()), + value: PatternValuePlace::Variable(v.clone()), + tx: PatternNonValuePlace::Placeholder, + }); + + // println!("{:#?}", cc); + + let d0_e = QualifiedAlias("all_datoms00".to_string(), DatomsColumn::Entity); + + assert!(!cc.is_known_empty); + assert_eq!(cc.from, vec![SourceAlias(DatomsTable::AllDatoms, "all_datoms00".to_string())]); + + // ?x must be a ref. + assert_eq!(cc.known_type(&x).unwrap(), ValueType::Ref); + + // ?x is bound to datoms0.e. + assert_eq!(cc.column_bindings.get(&x).unwrap(), &vec![d0_e.clone()]); + assert_eq!(cc.wheres, vec![].into()); + } + + /// This test ensures that we query all_datoms if we're looking for a string. + #[test] + fn test_apply_unattributed_pattern_with_string_value() { + let mut cc = ConjoiningClauses::default(); + let schema = Schema::default(); + + let x = Variable(PlainSymbol::new("?x")); + cc.apply_pattern(&schema, Pattern { + source: None, + entity: PatternNonValuePlace::Variable(x.clone()), + attribute: PatternNonValuePlace::Placeholder, + value: PatternValuePlace::Constant(NonIntegerConstant::Text("hello".to_string())), + tx: PatternNonValuePlace::Placeholder, + }); + + // println!("{:#?}", cc); + + let d0_e = QualifiedAlias("all_datoms00".to_string(), DatomsColumn::Entity); + let d0_v = QualifiedAlias("all_datoms00".to_string(), DatomsColumn::Value); + + assert!(!cc.is_known_empty); + assert_eq!(cc.from, vec![SourceAlias(DatomsTable::AllDatoms, "all_datoms00".to_string())]); + + // ?x must be a ref. + assert_eq!(cc.known_type(&x).unwrap(), ValueType::Ref); + + // ?x is bound to datoms0.e. + assert_eq!(cc.column_bindings.get(&x).unwrap(), &vec![d0_e.clone()]); + + // Our 'where' clauses are two: + // - datoms0.v = 'hello' + // - datoms0.value_type_tag = string + // TODO: implement expand_type_tags. + assert_eq!(cc.wheres, vec![ + ColumnConstraint::Equals(d0_v, QueryValue::TypedValue(TypedValue::String("hello".to_string()))), + ColumnConstraint::HasType("all_datoms00".to_string(), ValueType::String), + ].into()); + } + + #[test] + fn test_apply_two_patterns() { + let mut cc = ConjoiningClauses::default(); + let mut schema = Schema::default(); + + associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 99); + associate_ident(&mut schema, NamespacedKeyword::new("foo", "roz"), 98); + add_attribute(&mut schema, 99, Attribute { + value_type: ValueType::Boolean, + ..Default::default() + }); + add_attribute(&mut schema, 98, Attribute { + value_type: ValueType::String, + ..Default::default() + }); + + let x = Variable(PlainSymbol::new("?x")); + let y = Variable(PlainSymbol::new("?y")); + cc.apply_pattern(&schema, Pattern { + source: None, + entity: PatternNonValuePlace::Variable(x.clone()), + attribute: PatternNonValuePlace::Ident(NamespacedKeyword::new("foo", "roz")), + value: PatternValuePlace::Constant(NonIntegerConstant::Text("idgoeshere".to_string())), + tx: PatternNonValuePlace::Placeholder, + }); + cc.apply_pattern(&schema, Pattern { + source: None, + entity: PatternNonValuePlace::Variable(x.clone()), + attribute: PatternNonValuePlace::Ident(NamespacedKeyword::new("foo", "bar")), + value: PatternValuePlace::Variable(y.clone()), + tx: PatternNonValuePlace::Placeholder, + }); + + // Finally, expand column bindings to get the overlaps for ?x. + cc.expand_column_bindings(); + + println!("{:#?}", cc); + + let d0_e = QualifiedAlias("datoms00".to_string(), DatomsColumn::Entity); + let d0_a = QualifiedAlias("datoms00".to_string(), DatomsColumn::Attribute); + let d0_v = QualifiedAlias("datoms00".to_string(), DatomsColumn::Value); + let d1_e = QualifiedAlias("datoms01".to_string(), DatomsColumn::Entity); + let d1_a = QualifiedAlias("datoms01".to_string(), DatomsColumn::Attribute); + + assert!(!cc.is_known_empty); + assert_eq!(cc.from, vec![ + SourceAlias(DatomsTable::Datoms, "datoms00".to_string()), + SourceAlias(DatomsTable::Datoms, "datoms01".to_string()), + ]); + + // ?x must be a ref. + assert_eq!(cc.known_type(&x).unwrap(), ValueType::Ref); + + // ?x is bound to datoms0.e and datoms1.e. + assert_eq!(cc.column_bindings.get(&x).unwrap(), + &vec![ + d0_e.clone(), + d1_e.clone(), + ]); + + // Our 'where' clauses are four: + // - datoms0.a = 98 (:foo/roz) + // - datoms0.v = "idgoeshere" + // - datoms1.a = 99 (:foo/bar) + // - datoms1.e = datoms0.e + assert_eq!(cc.wheres, vec![ + ColumnConstraint::Equals(d0_a, QueryValue::Entid(98)), + ColumnConstraint::Equals(d0_v, QueryValue::TypedValue(TypedValue::String("idgoeshere".to_string()))), + ColumnConstraint::Equals(d1_a, QueryValue::Entid(99)), + ColumnConstraint::Equals(d0_e, QueryValue::Column(d1_e)), + ].into()); + } + + #[test] + fn test_value_bindings() { + let mut schema = Schema::default(); + + associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 99); + add_attribute(&mut schema, 99, Attribute { + value_type: ValueType::Boolean, + ..Default::default() + }); + + let x = Variable(PlainSymbol::new("?x")); + let y = Variable(PlainSymbol::new("?y")); + + let b: BTreeMap = + vec![(y.clone(), TypedValue::Boolean(true))].into_iter().collect(); + let mut cc = ConjoiningClauses::with_value_bindings(b); + + cc.apply_pattern(&schema, Pattern { + source: None, + entity: PatternNonValuePlace::Variable(x.clone()), + attribute: PatternNonValuePlace::Ident(NamespacedKeyword::new("foo", "bar")), + value: PatternValuePlace::Variable(y.clone()), + tx: PatternNonValuePlace::Placeholder, + }); + + let d0_e = QualifiedAlias("datoms00".to_string(), DatomsColumn::Entity); + let d0_a = QualifiedAlias("datoms00".to_string(), DatomsColumn::Attribute); + let d0_v = QualifiedAlias("datoms00".to_string(), DatomsColumn::Value); + + // ?y has been expanded into `true`. + assert_eq!(cc.wheres, vec![ + ColumnConstraint::Equals(d0_a, QueryValue::Entid(99)), + ColumnConstraint::Equals(d0_v, QueryValue::TypedValue(TypedValue::Boolean(true))), + ].into()); + + // There is no binding for ?y. + assert!(!cc.column_bindings.contains_key(&y)); + + // ?x is bound to the entity. + assert_eq!(cc.column_bindings.get(&x).unwrap(), + &vec![d0_e.clone()]); + } + + #[test] + /// Bind a value to a variable in a query where the type of the value disagrees with the type of + /// the variable inferred from known attributes. + fn test_value_bindings_type_disagreement() { + let mut schema = Schema::default(); + + associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 99); + add_attribute(&mut schema, 99, Attribute { + value_type: ValueType::Boolean, + ..Default::default() + }); + + let x = Variable(PlainSymbol::new("?x")); + let y = Variable(PlainSymbol::new("?y")); + + let b: BTreeMap = + vec![(y.clone(), TypedValue::Long(42))].into_iter().collect(); + let mut cc = ConjoiningClauses::with_value_bindings(b); + + cc.apply_pattern(&schema, Pattern { + source: None, + entity: PatternNonValuePlace::Variable(x.clone()), + attribute: PatternNonValuePlace::Ident(NamespacedKeyword::new("foo", "bar")), + value: PatternValuePlace::Variable(y.clone()), + tx: PatternNonValuePlace::Placeholder, + }); + + // The type of the provided binding doesn't match the type of the attribute. + assert!(cc.is_known_empty); + } + + #[test] + /// Bind a non-textual value to a variable in a query where the variable is used as the value + /// of a fulltext-valued attribute. + fn test_fulltext_type_disagreement() { + let mut schema = Schema::default(); + + associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 99); + add_attribute(&mut schema, 99, Attribute { + value_type: ValueType::String, + fulltext: true, + ..Default::default() + }); + + let x = Variable(PlainSymbol::new("?x")); + let y = Variable(PlainSymbol::new("?y")); + + let b: BTreeMap = + vec![(y.clone(), TypedValue::Long(42))].into_iter().collect(); + let mut cc = ConjoiningClauses::with_value_bindings(b); + + cc.apply_pattern(&schema, Pattern { + source: None, + entity: PatternNonValuePlace::Variable(x.clone()), + attribute: PatternNonValuePlace::Ident(NamespacedKeyword::new("foo", "bar")), + value: PatternValuePlace::Variable(y.clone()), + tx: PatternNonValuePlace::Placeholder, + }); + + // The type of the provided binding doesn't match the type of the attribute. + assert!(cc.is_known_empty); + } + + #[test] + /// Apply two patterns with differently typed attributes, but sharing a variable in the value + /// place. No value can bind to a variable and match both types, so the CC is known to return + /// no results. + fn test_apply_two_conflicting_known_patterns() { + let mut cc = ConjoiningClauses::default(); + let mut schema = Schema::default(); + + associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 99); + associate_ident(&mut schema, NamespacedKeyword::new("foo", "roz"), 98); + add_attribute(&mut schema, 99, Attribute { + value_type: ValueType::Boolean, + ..Default::default() + }); + add_attribute(&mut schema, 98, Attribute { + value_type: ValueType::String, + unique: Some(Unique::Identity), + ..Default::default() + }); + + let x = Variable(PlainSymbol::new("?x")); + let y = Variable(PlainSymbol::new("?y")); + cc.apply_pattern(&schema, Pattern { + source: None, + entity: PatternNonValuePlace::Variable(x.clone()), + attribute: PatternNonValuePlace::Ident(NamespacedKeyword::new("foo", "roz")), + value: PatternValuePlace::Variable(y.clone()), + tx: PatternNonValuePlace::Placeholder, + }); + cc.apply_pattern(&schema, Pattern { + source: None, + entity: PatternNonValuePlace::Variable(x.clone()), + attribute: PatternNonValuePlace::Ident(NamespacedKeyword::new("foo", "bar")), + value: PatternValuePlace::Variable(y.clone()), + tx: PatternNonValuePlace::Placeholder, + }); + + // Finally, expand column bindings to get the overlaps for ?x. + cc.expand_column_bindings(); + + assert!(cc.is_known_empty); + assert_eq!(cc.empty_because.unwrap(), + EmptyBecause::TypeMismatch(y.clone(), unit_type_set(ValueType::String), ValueType::Boolean)); + } + + #[test] + #[should_panic(expected = "assertion failed: cc.is_known_empty")] + /// This test needs range inference in order to succeed: we must deduce that ?y must + /// simultaneously be a boolean-valued attribute and a ref-valued attribute, and thus + /// the CC can never return results. + fn test_apply_two_implicitly_conflicting_patterns() { + let mut cc = ConjoiningClauses::default(); + let schema = Schema::default(); + + // [:find ?x :where + // [?x ?y true] + // [?z ?y ?x]] + let x = Variable(PlainSymbol::new("?x")); + let y = Variable(PlainSymbol::new("?y")); + let z = Variable(PlainSymbol::new("?z")); + cc.apply_pattern(&schema, Pattern { + source: None, + entity: PatternNonValuePlace::Variable(x.clone()), + attribute: PatternNonValuePlace::Variable(y.clone()), + value: PatternValuePlace::Constant(NonIntegerConstant::Boolean(true)), + tx: PatternNonValuePlace::Placeholder, + }); + cc.apply_pattern(&schema, Pattern { + source: None, + entity: PatternNonValuePlace::Variable(z.clone()), + attribute: PatternNonValuePlace::Variable(y.clone()), + value: PatternValuePlace::Variable(x.clone()), + tx: PatternNonValuePlace::Placeholder, + }); + + // Finally, expand column bindings to get the overlaps for ?x. + cc.expand_column_bindings(); + + assert!(cc.is_known_empty); + assert_eq!(cc.empty_because.unwrap(), + EmptyBecause::TypeMismatch(x.clone(), unit_type_set(ValueType::Ref), ValueType::Boolean)); + } +} \ No newline at end of file diff --git a/query-algebrizer/src/clauses/predicate.rs b/query-algebrizer/src/clauses/predicate.rs new file mode 100644 index 00000000..87fe6e2e --- /dev/null +++ b/query-algebrizer/src/clauses/predicate.rs @@ -0,0 +1,233 @@ +// Copyright 2016 Mozilla +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use +// this file except in compliance with the License. You may obtain a copy of the +// License at http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +use mentat_core::{ + Schema, +}; + +use mentat_query::{ + Predicate, +}; + +use clauses::ConjoiningClauses; + +use errors::{ + Result, + ErrorKind, +}; + +use types::{ + ColumnConstraint, + NumericComparison, +}; + +/// Application of predicates. +impl ConjoiningClauses { + /// There are several kinds of predicates/functions in our Datalog: + /// - A limited set of binary comparison operators: < > <= >= !=. + /// These are converted into SQLite binary comparisons and some type constraints. + /// - A set of predicates like `fulltext` and `get-else` that are translated into + /// SQL `MATCH`es or joins, yielding bindings. + /// - In the future, some predicates that are implemented via function calls in SQLite. + /// + /// At present we have implemented only the five built-in comparison binary operators. + pub fn apply_predicate<'s, 'p>(&mut self, schema: &'s Schema, predicate: Predicate) -> Result<()> { + // Because we'll be growing the set of built-in predicates, handling each differently, + // and ultimately allowing user-specified predicates, we match on the predicate name first. + if let Some(op) = NumericComparison::from_datalog_operator(predicate.operator.0.as_str()) { + self.apply_numeric_predicate(schema, op, predicate) + } else { + bail!(ErrorKind::UnknownFunction(predicate.operator.clone())) + } + } + + /// This function: + /// - Resolves variables and converts types to those more amenable to SQL. + /// - Ensures that the predicate functions name a known operator. + /// - Accumulates a `NumericInequality` constraint into the `wheres` list. + #[allow(unused_variables)] + pub fn apply_numeric_predicate<'s, 'p>(&mut self, schema: &'s Schema, comparison: NumericComparison, predicate: Predicate) -> Result<()> { + if predicate.args.len() != 2 { + bail!(ErrorKind::InvalidNumberOfArguments(predicate.operator.clone(), predicate.args.len(), 2)); + } + + // Go from arguments -- parser output -- to columns or values. + // Any variables that aren't bound by this point in the linear processing of clauses will + // cause the application of the predicate to fail. + let mut args = predicate.args.into_iter(); + let left = self.resolve_numeric_argument(&predicate.operator, 0, args.next().unwrap())?; + let right = self.resolve_numeric_argument(&predicate.operator, 1, args.next().unwrap())?; + + // These arguments must be variables or numeric constants. + // TODO: generalize argument resolution and validation for different kinds of predicates: + // as we process `(< ?x 5)` we are able to check or deduce that `?x` is numeric, and either + // simplify the pattern or optimize the rest of the query. + // To do so needs a slightly more sophisticated representation of type constraints — a set, + // not a single `Option`. + + // TODO: static evaluation. #383. + let constraint = ColumnConstraint::NumericInequality { + operator: comparison, + left: left, + right: right, + }; + self.wheres.also(constraint); + Ok(()) + } +} + +#[cfg(test)] +mod testing { + use super::*; + + use std::collections::HashSet; + + use mentat_core::attribute::Unique; + use mentat_core::{ + Attribute, + TypedValue, + ValueType, + }; + + use mentat_query::{ + FnArg, + NamespacedKeyword, + Pattern, + PatternNonValuePlace, + PatternValuePlace, + PlainSymbol, + Variable, + }; + + use clauses::{ + add_attribute, + associate_ident, + }; + + use types::{ + ColumnConstraint, + EmptyBecause, + QueryValue, + }; + + + #[test] + /// Apply two patterns: a pattern and a numeric predicate. + /// Verify that after application of the predicate we know that the value + /// must be numeric. + fn test_apply_numeric_predicate() { + let mut cc = ConjoiningClauses::default(); + let mut schema = Schema::default(); + + associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 99); + add_attribute(&mut schema, 99, Attribute { + value_type: ValueType::Long, + ..Default::default() + }); + + let x = Variable(PlainSymbol::new("?x")); + let y = Variable(PlainSymbol::new("?y")); + cc.apply_pattern(&schema, Pattern { + source: None, + entity: PatternNonValuePlace::Variable(x.clone()), + attribute: PatternNonValuePlace::Placeholder, + value: PatternValuePlace::Variable(y.clone()), + tx: PatternNonValuePlace::Placeholder, + }); + assert!(!cc.is_known_empty); + + let op = PlainSymbol::new("<"); + let comp = NumericComparison::from_datalog_operator(op.plain_name()).unwrap(); + assert!(cc.apply_numeric_predicate(&schema, comp, Predicate { + operator: op, + args: vec![ + FnArg::Variable(Variable(PlainSymbol::new("?y"))), FnArg::EntidOrInteger(10), + ]}).is_ok()); + + assert!(!cc.is_known_empty); + + // Finally, expand column bindings to get the overlaps for ?x. + cc.expand_column_bindings(); + assert!(!cc.is_known_empty); + + // After processing those two clauses, we know that ?y must be numeric, but not exactly + // which type it must be. + assert_eq!(None, cc.known_type(&y)); // Not just one. + let expected: HashSet = vec![ValueType::Double, ValueType::Long].into_iter().collect(); + assert_eq!(Some(&expected), cc.known_types.get(&y)); + + let clauses = cc.wheres; + assert_eq!(clauses.len(), 1); + assert_eq!(clauses.0[0], ColumnConstraint::NumericInequality { + operator: NumericComparison::LessThan, + left: QueryValue::Column(cc.column_bindings.get(&y).unwrap()[0].clone()), + right: QueryValue::TypedValue(TypedValue::Long(10)), + }.into()); + } + + #[test] + /// Apply three patterns: an unbound pattern to establish a value var, + /// a predicate to constrain the val to numeric types, and a third pattern to conflict with the + /// numeric types and cause the pattern to fail. + fn test_apply_conflict_with_numeric_range() { + let mut cc = ConjoiningClauses::default(); + let mut schema = Schema::default(); + + associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 99); + associate_ident(&mut schema, NamespacedKeyword::new("foo", "roz"), 98); + add_attribute(&mut schema, 99, Attribute { + value_type: ValueType::Long, + ..Default::default() + }); + add_attribute(&mut schema, 98, Attribute { + value_type: ValueType::String, + unique: Some(Unique::Identity), + ..Default::default() + }); + + let x = Variable(PlainSymbol::new("?x")); + let y = Variable(PlainSymbol::new("?y")); + cc.apply_pattern(&schema, Pattern { + source: None, + entity: PatternNonValuePlace::Variable(x.clone()), + attribute: PatternNonValuePlace::Placeholder, + value: PatternValuePlace::Variable(y.clone()), + tx: PatternNonValuePlace::Placeholder, + }); + assert!(!cc.is_known_empty); + + let op = PlainSymbol::new(">="); + let comp = NumericComparison::from_datalog_operator(op.plain_name()).unwrap(); + assert!(cc.apply_numeric_predicate(&schema, comp, Predicate { + operator: op, + args: vec![ + FnArg::Variable(Variable(PlainSymbol::new("?y"))), FnArg::EntidOrInteger(10), + ]}).is_ok()); + + assert!(!cc.is_known_empty); + cc.apply_pattern(&schema, Pattern { + source: None, + entity: PatternNonValuePlace::Variable(x.clone()), + attribute: PatternNonValuePlace::Ident(NamespacedKeyword::new("foo", "roz")), + value: PatternValuePlace::Variable(y.clone()), + tx: PatternNonValuePlace::Placeholder, + }); + + // Finally, expand column bindings to get the overlaps for ?x. + cc.expand_column_bindings(); + + assert!(cc.is_known_empty); + assert_eq!(cc.empty_because.unwrap(), + EmptyBecause::TypeMismatch(y.clone(), + vec![ValueType::Double, ValueType::Long].into_iter() + .collect(), + ValueType::String)); + } +} \ No newline at end of file diff --git a/query-algebrizer/src/clauses/resolve.rs b/query-algebrizer/src/clauses/resolve.rs new file mode 100644 index 00000000..ff379740 --- /dev/null +++ b/query-algebrizer/src/clauses/resolve.rs @@ -0,0 +1,87 @@ +// Copyright 2016 Mozilla +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use +// this file except in compliance with the License. You may obtain a copy of the +// License at http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +use mentat_core::{ + TypedValue, +}; + +use mentat_query::{ + FnArg, + NonIntegerConstant, + PlainSymbol, +}; + +use clauses::ConjoiningClauses; + +use errors::{ + Result, + Error, + ErrorKind, +}; + +use types::{ + EmptyBecause, + QueryValue, +}; + +/// Argument resolution. +impl ConjoiningClauses { + /// Take a function argument and turn it into a `QueryValue` suitable for use in a concrete + /// constraint. + /// Additionally, do two things: + /// - Mark the pattern as known-empty if any argument is known non-numeric. + /// - Mark any variables encountered as numeric. + pub fn resolve_numeric_argument(&mut self, function: &PlainSymbol, position: usize, arg: FnArg) -> Result { + use self::FnArg::*; + match arg { + FnArg::Variable(var) => { + self.constrain_var_to_numeric(var.clone()); + self.column_bindings + .get(&var) + .and_then(|cols| cols.first().map(|col| QueryValue::Column(col.clone()))) + .ok_or_else(|| Error::from_kind(ErrorKind::UnboundVariable(var))) + }, + // Can't be an entid. + EntidOrInteger(i) => Ok(QueryValue::TypedValue(TypedValue::Long(i))), + Ident(_) | + SrcVar(_) | + Constant(NonIntegerConstant::Boolean(_)) | + Constant(NonIntegerConstant::Text(_)) | + Constant(NonIntegerConstant::BigInteger(_)) => { + self.mark_known_empty(EmptyBecause::NonNumericArgument); + bail!(ErrorKind::NonNumericArgument(function.clone(), position)); + }, + Constant(NonIntegerConstant::Float(f)) => Ok(QueryValue::TypedValue(TypedValue::Double(f))), + } + } + + + /// Take a function argument and turn it into a `QueryValue` suitable for use in a concrete + /// constraint. + #[allow(dead_code)] + fn resolve_argument(&self, arg: FnArg) -> Result { + use self::FnArg::*; + match arg { + FnArg::Variable(var) => { + self.column_bindings + .get(&var) + .and_then(|cols| cols.first().map(|col| QueryValue::Column(col.clone()))) + .ok_or_else(|| Error::from_kind(ErrorKind::UnboundVariable(var))) + }, + EntidOrInteger(i) => Ok(QueryValue::PrimitiveLong(i)), + Ident(_) => unimplemented!(), // TODO + Constant(NonIntegerConstant::Boolean(val)) => Ok(QueryValue::TypedValue(TypedValue::Boolean(val))), + Constant(NonIntegerConstant::Float(f)) => Ok(QueryValue::TypedValue(TypedValue::Double(f))), + Constant(NonIntegerConstant::Text(s)) => Ok(QueryValue::TypedValue(TypedValue::String(s.clone()))), + Constant(NonIntegerConstant::BigInteger(_)) => unimplemented!(), + SrcVar(_) => unimplemented!(), + } + } +} diff --git a/query-algebrizer/src/lib.rs b/query-algebrizer/src/lib.rs index 51832d26..a0e90db3 100644 --- a/query-algebrizer/src/lib.rs +++ b/query-algebrizer/src/lib.rs @@ -17,7 +17,8 @@ extern crate mentat_query; mod errors; mod types; mod validate; -mod cc; +mod clauses; + use mentat_core::{ Schema, @@ -27,7 +28,6 @@ use mentat_query::{ FindQuery, FindSpec, SrcVar, - WhereClause, }; pub use errors::{ @@ -42,7 +42,7 @@ pub struct AlgebraicQuery { pub find_spec: FindSpec, has_aggregates: bool, pub limit: Option, - pub cc: cc::ConjoiningClauses, + pub cc: clauses::ConjoiningClauses, } impl AlgebraicQuery { @@ -72,7 +72,7 @@ impl AlgebraicQuery { pub fn algebrize(schema: &Schema, parsed: FindQuery) -> Result { // TODO: integrate default source into pattern processing. // TODO: flesh out the rest of find-into-context. - let mut cc = cc::ConjoiningClauses::default(); + let mut cc = clauses::ConjoiningClauses::default(); let where_clauses = parsed.where_clauses; for where_clause in where_clauses { cc.apply_clause(schema, where_clause)?; @@ -88,7 +88,7 @@ pub fn algebrize(schema: &Schema, parsed: FindQuery) -> Result { }) } -pub use cc::{ +pub use clauses::{ ConjoiningClauses, }; diff --git a/query-algebrizer/src/types.rs b/query-algebrizer/src/types.rs index 6214fa19..457aa2ab 100644 --- a/query-algebrizer/src/types.rs +++ b/query-algebrizer/src/types.rs @@ -309,4 +309,41 @@ pub enum EmptyBecause { InvalidBinding(DatomsColumn, TypedValue), ValueTypeMismatch(ValueType, TypedValue), AttributeLookupFailed, // Catch-all, because the table lookup code is lazy. TODO +} + +impl Debug for EmptyBecause { + fn fmt(&self, f: &mut Formatter) -> ::std::fmt::Result { + use self::EmptyBecause::*; + match self { + &TypeMismatch(ref var, ref existing, ref desired) => { + write!(f, "Type mismatch: {:?} can't be {:?}, because it's already {:?}", + var, desired, existing) + }, + &NonNumericArgument => { + write!(f, "Non-numeric argument in numeric place") + }, + &NonStringFulltextValue => { + write!(f, "Non-string argument for fulltext attribute") + }, + &UnresolvedIdent(ref kw) => { + write!(f, "Couldn't resolve keyword {}", kw) + }, + &InvalidAttributeIdent(ref kw) => { + write!(f, "{} does not name an attribute", kw) + }, + &InvalidAttributeEntid(entid) => { + write!(f, "{} is not an attribute", entid) + }, + &InvalidBinding(ref column, ref tv) => { + write!(f, "{:?} cannot name column {:?}", tv, column) + }, + &ValueTypeMismatch(value_type, ref typed_value) => { + write!(f, "Type mismatch: {:?} doesn't match attribute type {:?}", + typed_value, value_type) + }, + &AttributeLookupFailed => { + write!(f, "Attribute lookup failed") + }, + } + } } \ No newline at end of file diff --git a/query-algebrizer/src/validate.rs b/query-algebrizer/src/validate.rs index 5fea33c1..4eb56662 100644 --- a/query-algebrizer/src/validate.rs +++ b/query-algebrizer/src/validate.rs @@ -14,7 +14,6 @@ use mentat_query::{ ContainsVariables, OrJoin, Variable, - WhereClause, UnifyVars, }; @@ -89,7 +88,6 @@ mod tests { PatternNonValuePlace, PatternValuePlace, PlainSymbol, - SrcVar, UnifyVars, Variable, WhereClause, From 74f188df9b638aff2ae02a8e434d746ba5cd5790 Mon Sep 17 00:00:00 2001 From: Richard Newman Date: Thu, 30 Mar 2017 18:16:04 -0700 Subject: [PATCH 10/11] Part 5b: rename also/instead to add_intersection and add_alternate. --- query-algebrizer/src/clauses/mod.rs | 8 ++++---- query-algebrizer/src/clauses/pattern.rs | 6 +++--- query-algebrizer/src/clauses/predicate.rs | 2 +- query-algebrizer/src/types.rs | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/query-algebrizer/src/clauses/mod.rs b/query-algebrizer/src/clauses/mod.rs index 732cd70a..f9ee4edc 100644 --- a/query-algebrizer/src/clauses/mod.rs +++ b/query-algebrizer/src/clauses/mod.rs @@ -270,11 +270,11 @@ impl ConjoiningClauses { } pub fn constrain_column_to_constant(&mut self, table: TableAlias, column: DatomsColumn, constant: TypedValue) { - self.wheres.also(ColumnConstraint::Equals(QualifiedAlias(table, column), QueryValue::TypedValue(constant))) + self.wheres.add_intersection(ColumnConstraint::Equals(QualifiedAlias(table, column), QueryValue::TypedValue(constant))) } pub fn constrain_column_to_entity(&mut self, table: TableAlias, column: DatomsColumn, entity: Entid) { - self.wheres.also(ColumnConstraint::Equals(QualifiedAlias(table, column), QueryValue::Entid(entity))) + self.wheres.add_intersection(ColumnConstraint::Equals(QualifiedAlias(table, column), QueryValue::Entid(entity))) } pub fn constrain_attribute(&mut self, table: TableAlias, attribute: Entid) { @@ -282,7 +282,7 @@ impl ConjoiningClauses { } pub fn constrain_value_to_numeric(&mut self, table: TableAlias, value: i64) { - self.wheres.also(ColumnConstraint::Equals( + self.wheres.add_intersection(ColumnConstraint::Equals( QualifiedAlias(table, DatomsColumn::Value), QueryValue::PrimitiveLong(value))) } @@ -529,7 +529,7 @@ impl ConjoiningClauses { // TODO: if both primary and secondary are .v, should we make sure // the type tag columns also match? // We don't do so in the ClojureScript version. - self.wheres.also(ColumnConstraint::Equals(primary.clone(), QueryValue::Column(secondary.clone()))); + self.wheres.add_intersection(ColumnConstraint::Equals(primary.clone(), QueryValue::Column(secondary.clone()))); } } } diff --git a/query-algebrizer/src/clauses/pattern.rs b/query-algebrizer/src/clauses/pattern.rs index cc7c901d..0005e794 100644 --- a/query-algebrizer/src/clauses/pattern.rs +++ b/query-algebrizer/src/clauses/pattern.rs @@ -196,7 +196,7 @@ impl ConjoiningClauses { } else { // It must be a keyword. self.constrain_column_to_constant(col.clone(), DatomsColumn::Value, TypedValue::Keyword(kw.clone())); - self.wheres.also(ColumnConstraint::HasType(col.clone(), ValueType::Keyword)); + self.wheres.add_intersection(ColumnConstraint::HasType(col.clone(), ValueType::Keyword)); }; }, PatternValuePlace::Constant(ref c) => { @@ -232,7 +232,7 @@ impl ConjoiningClauses { // Because everything we handle here is unambiguous, we generate a single type // restriction from the value type of the typed value. if value_type.is_none() { - self.wheres.also(ColumnConstraint::HasType(col.clone(), typed_value_type)); + self.wheres.add_intersection(ColumnConstraint::HasType(col.clone(), typed_value_type)); } }, @@ -811,4 +811,4 @@ mod testing { assert_eq!(cc.empty_because.unwrap(), EmptyBecause::TypeMismatch(x.clone(), unit_type_set(ValueType::Ref), ValueType::Boolean)); } -} \ No newline at end of file +} diff --git a/query-algebrizer/src/clauses/predicate.rs b/query-algebrizer/src/clauses/predicate.rs index 87fe6e2e..c26d70f1 100644 --- a/query-algebrizer/src/clauses/predicate.rs +++ b/query-algebrizer/src/clauses/predicate.rs @@ -78,7 +78,7 @@ impl ConjoiningClauses { left: left, right: right, }; - self.wheres.also(constraint); + self.wheres.add_intersection(constraint); Ok(()) } } diff --git a/query-algebrizer/src/types.rs b/query-algebrizer/src/types.rs index 457aa2ab..121e5f85 100644 --- a/query-algebrizer/src/types.rs +++ b/query-algebrizer/src/types.rs @@ -241,7 +241,7 @@ impl ColumnIntersection { self.0.is_empty() } - pub fn also(&mut self, constraint: ColumnConstraint) { + pub fn add_intersection(&mut self, constraint: ColumnConstraint) { self.0.push(ColumnConstraintOrAlternation::Constraint(constraint)); } } @@ -267,7 +267,7 @@ impl IntoIterator for ColumnAlternation { } impl ColumnAlternation { - pub fn instead(&mut self, intersection: ColumnIntersection) { + pub fn add_alternate(&mut self, intersection: ColumnIntersection) { self.0.push(intersection); } } From 2b2b5cf69652a6fd152a333650ab8c62b969d2f9 Mon Sep 17 00:00:00 2001 From: Richard Newman Date: Tue, 28 Mar 2017 16:17:25 -0700 Subject: [PATCH 11/11] Part 6: implement decision tree for processing simple alternation. --- query-algebrizer/src/clauses/or.rs | 311 ++++++++++++++++++++++++++++- 1 file changed, 307 insertions(+), 4 deletions(-) diff --git a/query-algebrizer/src/clauses/or.rs b/query-algebrizer/src/clauses/or.rs index d061d48d..d8e6b1fe 100644 --- a/query-algebrizer/src/clauses/or.rs +++ b/query-algebrizer/src/clauses/or.rs @@ -20,12 +20,16 @@ use mentat_core::{ use mentat_query::{ NonIntegerConstant, + OrJoin, + OrWhereClause, Pattern, PatternValuePlace, PatternNonValuePlace, PlainSymbol, Predicate, SrcVar, + UnifyVars, + WhereClause, }; use clauses::ConjoiningClauses; @@ -43,16 +47,12 @@ use types::{ DatomsTable, EmptyBecause, NumericComparison, - OrJoinKind, QualifiedAlias, QueryValue, SourceAlias, TableAlias, }; - - - /// Return true if both left and right are the same variable or both are non-variable. fn _simply_matches_place(left: &PatternNonValuePlace, right: &PatternNonValuePlace) -> bool { match (left, right) { @@ -78,3 +78,306 @@ fn _simply_matches_value_place(left: &PatternValuePlace, right: &PatternValuePla _ => true, } } + +pub enum DeconstructedOrJoin { + KnownSuccess, + KnownEmpty(EmptyBecause), + Unit(OrWhereClause), + UnitPattern(Pattern), + Simple(Vec), + Complex(OrJoin), +} + +/// Application of `or`. Note that this is recursive! +impl ConjoiningClauses { + fn apply_or_where_clause(&mut self, schema: &Schema, clause: OrWhereClause) -> Result<()> { + match clause { + OrWhereClause::Clause(clause) => self.apply_clause(schema, clause), + + // A query might be: + // [:find ?x :where (or (and [?x _ 5] [?x :foo/bar 7]))] + // which is equivalent to dropping the `or` _and_ the `and`! + OrWhereClause::And(clauses) => { + for clause in clauses { + self.apply_clause(schema, clause)?; + } + Ok(()) + }, + } + } + + fn apply_or_join(&mut self, schema: &Schema, mut or_join: OrJoin) -> Result<()> { + // Simple optimization. Empty `or` clauses disappear. Unit `or` clauses + // are equivalent to just the inner clause. + match or_join.clauses.len() { + 0 => Ok(()), + 1 => self.apply_or_where_clause(schema, or_join.clauses.pop().unwrap()), + _ => self.apply_non_trivial_or_join(schema, or_join), + } + } + + /// Find out if the `OrJoin` is simple. A simple `or` is one in + /// which: + /// - Every arm is a pattern, so that we can use a single table alias for all. + /// - Each pattern should run against the same table, for the same reason. + /// - Each pattern uses the same variables. (That's checked by validation.) + /// - Each pattern has the same shape, so we can extract bindings from the same columns + /// regardless of which clause matched. + /// + /// Like this: + /// + /// ```edn + /// [:find ?x + /// :where (or [?x :foo/knows "John"] + /// [?x :foo/parent "Ámbar"] + /// [?x :foo/knows "Daphne"])] + /// ``` + /// + /// While we're doing this diagnosis, we'll also find out if: + /// - No patterns can match: the enclosing CC is known-empty. + /// - Some patterns can't match: they are discarded. + /// - Only one pattern can match: the `or` can be simplified away. + fn deconstruct_or_join(&self, schema: &Schema, or_join: OrJoin) -> DeconstructedOrJoin { + // If we have explicit non-maximal unify-vars, we *can't* simply run this as a + // single pattern -- + // ``` + // [:find ?x :where [?x :foo/bar ?y] (or-join [?x] [?x :foo/baz ?y])] + // ``` + // is *not* equivalent to + // ``` + // [:find ?x :where [?x :foo/bar ?y] [?x :foo/baz ?y]] + // ``` + if !or_join.is_fully_unified() { + // It's complex because we need to make sure that non-unified vars + // mentioned in the body of the `or-join` do not unify with variables + // outside the `or-join`. We can't naïvely collect clauses into the + // same CC. TODO: pay attention to the unify list when generating + // constraints. Temporarily shadow variables within each `or` branch. + return DeconstructedOrJoin::Complex(or_join); + } + + match or_join.clauses.len() { + 0 => DeconstructedOrJoin::KnownSuccess, + + // It's safe to simply 'leak' the entire clause, because we know every var in it is + // supposed to unify with the enclosing form. + 1 => DeconstructedOrJoin::Unit(or_join.clauses.into_iter().next().unwrap()), + _ => self._deconstruct_or_join(schema, or_join), + } + } + + /// This helper does the work of taking a known-non-trivial `or` or `or-join`, + /// walking the contained patterns to decide whether it can be translated simply + /// -- as a collection of constraints on a single table alias -- or if it needs to + /// be implemented as a `UNION`. + /// + /// See the description of `deconstruct_or_join` for more details. This method expects + /// to be called _only_ by `deconstruct_or_join`. + fn _deconstruct_or_join(&self, schema: &Schema, or_join: OrJoin) -> DeconstructedOrJoin { + // Preconditions enforced by `deconstruct_or_join`. + assert_eq!(or_join.unify_vars, UnifyVars::Implicit); + assert!(or_join.clauses.len() >= 2); + + // We're going to collect into this. + // If at any point we hit something that's not a suitable pattern, we'll + // reconstruct and return a complex `OrJoin`. + let mut patterns: Vec = Vec::with_capacity(or_join.clauses.len()); + + // Keep track of the table we need every pattern to use. + let mut expected_table: Option = None; + + // Technically we might have several reasons, but we take the last -- that is, that's the + // reason we don't have at least one pattern! + // We'll return this as our reason if no pattern can return results. + let mut empty_because: Option = None; + + // Walk each clause in turn, bailing as soon as we know this can't be simple. + let mut clauses = or_join.clauses.into_iter(); + while let Some(clause) = clauses.next() { + // If we fail half-way through processing, we want to reconstitute the input. + // Keep a handle to the clause itself here to smooth over the moved `if let` below. + let last: OrWhereClause; + + if let OrWhereClause::Clause(WhereClause::Pattern(p)) = clause { + // Compute the table for the pattern. If we can't figure one out, it means + // the pattern cannot succeed; we drop it. + // Inside an `or` it's not a failure for a pattern to be unable to match, which + // manifests as a table being unable to be found. + let table = self.table_for_places(schema, &p.attribute, &p.value); + match table { + Err(e) => { + empty_because = Some(e); + + // Do not accumulate this pattern at all. Add lightness! + continue; + }, + Ok(table) => { + // Check the shape of the pattern against a previous pattern. + let same_shape = + if let Some(template) = patterns.get(0) { + template.source == p.source && // or-arms all use the same source anyway. + _simply_matches_place(&template.entity, &p.entity) && + _simply_matches_place(&template.attribute, &p.attribute) && + _simply_matches_value_place(&template.value, &p.value) && + _simply_matches_place(&template.tx, &p.tx) + } else { + // No previous pattern. + true + }; + + // All of our clauses that _do_ yield a table -- that are possible -- + // must use the same table in order for this to be a simple `or`! + if same_shape { + if expected_table == Some(table) { + patterns.push(p); + continue; + } + if expected_table.is_none() { + expected_table = Some(table); + patterns.push(p); + continue; + } + } + + // Otherwise, we need to keep this pattern so we can reconstitute. + // We'll fall through to reconstruction. + } + } + last = OrWhereClause::Clause(WhereClause::Pattern(p)); + } else { + last = clause; + } + + // If we get here, it means one of our checks above failed. Reconstruct and bail. + let reconstructed: Vec = + // Non-empty patterns already collected… + patterns.into_iter() + .map(|p| OrWhereClause::Clause(WhereClause::Pattern(p))) + // … then the clause we just considered… + .chain(::std::iter::once(last)) + // … then the rest of the iterator. + .chain(clauses) + .collect(); + + return DeconstructedOrJoin::Complex(OrJoin { + unify_vars: UnifyVars::Implicit, + clauses: reconstructed, + }); + } + + // If we got here without returning, then `patterns` is what we're working with. + // If `patterns` is empty, it means _none_ of the clauses in the `or` could succeed. + match patterns.len() { + 0 => { + assert!(empty_because.is_some()); + DeconstructedOrJoin::KnownEmpty(empty_because.unwrap()) + }, + 1 => DeconstructedOrJoin::UnitPattern(patterns.pop().unwrap()), + _ => DeconstructedOrJoin::Simple(patterns), + } + } + + /// Only call this with an `or_join` with 2 or more patterns. + fn apply_non_trivial_or_join(&mut self, schema: &Schema, or_join: OrJoin) -> Result<()> { + assert!(or_join.clauses.len() >= 2); + + match self.deconstruct_or_join(schema, or_join) { + DeconstructedOrJoin::KnownSuccess => { + // The pattern came to us empty -- `(or)`. Do nothing. + Ok(()) + }, + DeconstructedOrJoin::KnownEmpty(reason) => { + // There were no arms of the join that could be mapped to a table. + // The entire `or`, and thus the CC, cannot yield results. + self.mark_known_empty(reason); + Ok(()) + }, + DeconstructedOrJoin::Unit(clause) => { + // There was only one clause. We're unifying all variables, so we can just apply here. + self.apply_or_where_clause(schema, clause) + }, + DeconstructedOrJoin::UnitPattern(pattern) => { + // Same, but simpler. + self.apply_pattern(schema, pattern); + Ok(()) + }, + DeconstructedOrJoin::Simple(patterns) => { + // Hooray! Fully unified and plain ol' patterns that all use the same table. + // Go right ahead and produce a set of constraint alternations that we can collect, + // using a single table alias. + // TODO + self.apply_simple_or_join(schema, patterns) + }, + DeconstructedOrJoin::Complex(_) => { + // Do this the hard way. TODO + unimplemented!(); + }, + } + } + + + /// A simple `or` join is effectively a single pattern in which an individual column's bindings + /// are not a single value. Rather than a pattern like + /// + /// ```edn + /// [?x :foo/knows "John"] + /// ``` + /// + /// we have + /// + /// ```edn + /// (or [?x :foo/knows "John"] + /// [?x :foo/hates "Peter"]) + /// ``` + /// + /// but the generated SQL is very similar: the former is + /// + /// ```sql + /// WHERE datoms00.a = 99 AND datoms00.v = 'John' + /// ``` + /// + /// with the latter growing to + /// + /// ```sql + /// WHERE (datoms00.a = 99 AND datoms00.v = 'John') + /// OR (datoms00.a = 98 AND datoms00.v = 'Peter') + /// ``` + /// + fn apply_simple_or_join(&mut self, schema: &Schema, patterns: Vec) -> Result<()> { + assert!(patterns.len() >= 2); + + // Each constant attribute might _expand_ the set of possible types of the value-place + // variable. We thus generate a set of possible types, and we intersect it with the + // types already possible in the CC. If the resultant set is empty, the pattern cannot match. + // If the final set isn't unit, we must project a type tag column. + // If one of the alternations requires a type that is impossible in the CC, then we can + // discard that alternate: + // + // ```edn + // [:find ?x + // :where [?a :some/int ?x] + // (or [_ :some/otherint ?x] + // [_ :some/string ?x])] + // ``` + // + // can simplify to + // + // ```edn + // [:find ?x + // :where [?a :some/int ?x] + // [_ :some/otherint ?x]] + // ``` + // + // Similarly, if the value place is constant, it must be of a type that doesn't determine + // a different table for any of the patterns. + // TODO + + // Begin by building a base CC that we'll use to produce constraints from each pattern. + // Populate this base CC with whatever variables are already known from the CC to which + // we're applying this `or`. + // This will give us any applicable type constraints or column mappings. + // Then generate a single table alias, based on the first pattern, and use that to make any + // new variable mappings we will need to extract values. + Ok(()) + } +}