diff --git a/core/Cargo.toml b/core/Cargo.toml index a4f93198..64424b03 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -4,9 +4,10 @@ version = "0.0.1" workspace = ".." [dependencies] +enum-set = { git = "https://github.com/rnewman/enum-set" } +lazy_static = "0.2.2" num = "0.1.35" ordered-float = "0.4.0" -lazy_static = "0.2.2" [dependencies.edn] path = "../edn" diff --git a/core/src/lib.rs b/core/src/lib.rs index 943a30d2..67b4d183 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -8,6 +8,8 @@ // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. +extern crate enum_set; + #[macro_use] extern crate lazy_static; extern crate ordered_float; @@ -19,6 +21,9 @@ pub mod values; use std::collections::BTreeMap; use std::fmt; use std::rc::Rc; + +use enum_set::EnumSet; + use self::ordered_float::OrderedFloat; use self::edn::NamespacedKeyword; @@ -33,7 +38,8 @@ pub type Entid = i64; /// The attribute of each Mentat assertion has a :db/valueType constraining the value to a /// particular set. Mentat recognizes the following :db/valueType values. -#[derive(Clone,Copy,Debug,Eq,Hash,Ord,PartialOrd,PartialEq)] +#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialOrd, PartialEq)] +#[repr(u32)] pub enum ValueType { Ref, Boolean, @@ -44,6 +50,32 @@ pub enum ValueType { Keyword, } +impl ValueType { + pub fn all_enums() -> EnumSet { + // TODO: lazy_static. + let mut s = EnumSet::new(); + s.insert(ValueType::Ref); + s.insert(ValueType::Boolean); + s.insert(ValueType::Instant); + s.insert(ValueType::Long); + s.insert(ValueType::Double); + s.insert(ValueType::String); + s.insert(ValueType::Keyword); + s + } +} + + +impl enum_set::CLike for ValueType { + fn to_u32(&self) -> u32 { + *self as u32 + } + + unsafe fn from_u32(v: u32) -> ValueType { + std::mem::transmute(v) + } +} + impl ValueType { pub fn to_edn_value(self) -> edn::Value { match self { diff --git a/query-algebrizer/Cargo.toml b/query-algebrizer/Cargo.toml index c21bc49b..bfa9ed84 100644 --- a/query-algebrizer/Cargo.toml +++ b/query-algebrizer/Cargo.toml @@ -4,6 +4,7 @@ version = "0.0.1" workspace = ".." [dependencies] +enum-set = { git = "https://github.com/rnewman/enum-set" } error-chain = "0.8.1" [dependencies.mentat_core] diff --git a/query-algebrizer/src/clauses/mod.rs b/query-algebrizer/src/clauses/mod.rs index ca054d89..c980fd97 100644 --- a/query-algebrizer/src/clauses/mod.rs +++ b/query-algebrizer/src/clauses/mod.rs @@ -13,7 +13,6 @@ use std::cmp; use std::collections::{ BTreeMap, BTreeSet, - HashSet, }; use std::collections::btree_map::Entry; @@ -59,6 +58,7 @@ use types::{ QueryValue, SourceAlias, TableAlias, + ValueTypeSet, }; mod inputs; @@ -82,12 +82,6 @@ impl RcCloned for ::std::rc::Rc { } } -fn unit_type_set(t: ValueType) -> HashSet { - let mut s = HashSet::with_capacity(1); - s.insert(t); - s -} - trait Contains { fn when_contains T>(&self, k: &K, f: F) -> Option; } @@ -201,10 +195,11 @@ pub struct ConjoiningClauses { /// A map from var to type. Whenever a var maps unambiguously to two different types, it cannot /// yield results, so we don't represent that case here. If a var isn't present in the map, it /// means that its type is not known in advance. - pub known_types: BTreeMap>, + /// Usually that state should be represented by `ValueTypeSet::Any`. + pub known_types: BTreeMap, /// A mapping, similar to `column_bindings`, but used to pull type tags out of the store at runtime. - /// If a var isn't present in `known_types`, it should be present here. + /// If a var isn't unit in `known_types`, it should be present here. pub extracted_types: BTreeMap, } @@ -278,7 +273,7 @@ impl ConjoiningClauses { // Pre-fill our type mappings with the types of the input bindings. cc.known_types .extend(types.iter() - .map(|(k, v)| (k.clone(), unit_type_set(*v)))); + .map(|(k, v)| (k.clone(), ValueTypeSet::of_one(*v)))); cc }, } @@ -330,7 +325,7 @@ impl ConjoiningClauses { /// or integer" isn't good enough. pub fn known_type(&self, var: &Variable) -> Option { match self.known_types.get(var) { - Some(types) if types.len() == 1 => types.iter().next().cloned(), + Some(set) if set.is_unit() => set.exemplar(), _ => None, } } @@ -443,16 +438,12 @@ impl ConjoiningClauses { /// Mark the given value as a long. pub fn constrain_var_to_long(&mut self, variable: Variable) { - self.narrow_types_for_var(variable, unit_type_set(ValueType::Long)); + self.narrow_types_for_var(variable, ValueTypeSet::of_one(ValueType::Long)); } /// Mark the given value as one of the set of numeric types. fn constrain_var_to_numeric(&mut self, variable: Variable) { - let mut numeric_types = HashSet::with_capacity(2); - numeric_types.insert(ValueType::Double); - numeric_types.insert(ValueType::Long); - - self.narrow_types_for_var(variable, numeric_types); + self.narrow_types_for_var(variable, ValueTypeSet::of_numeric_types()); } /// Constrains the var if there's no existing type. @@ -462,9 +453,9 @@ impl ConjoiningClauses { // Is there an existing mapping for this variable? // Any known inputs have already been added to known_types, and so if they conflict we'll // spot it here. - if let Some(existing) = self.known_types.insert(variable.clone(), unit_type_set(this_type)) { + if let Some(existing) = self.known_types.insert(variable.clone(), ValueTypeSet::of_one(this_type)) { // There was an existing mapping. Does this type match? - if !existing.contains(&this_type) { + if !existing.contains(this_type) { self.mark_known_empty(EmptyBecause::TypeMismatch(variable, existing, this_type)); } } @@ -477,21 +468,28 @@ impl ConjoiningClauses { /// actually move from a state in which a variable can have no type to one that can /// yield results! We never do so at present -- we carefully set-union types before we /// set-intersect them -- but this is worth bearing in mind. - pub fn broaden_types(&mut self, additional_types: BTreeMap>) { + pub fn broaden_types(&mut self, additional_types: BTreeMap) { for (var, new_types) in additional_types { match self.known_types.entry(var) { Entry::Vacant(e) => { - if new_types.len() == 1 { + if new_types.is_unit() { self.extracted_types.remove(e.key()); } e.insert(new_types); }, Entry::Occupied(mut e) => { - if e.get().is_empty() && self.empty_because.is_some() { - panic!("Uh oh: we failed this pattern, probably because {:?} couldn't match, but now we're broadening its type.", - e.get()); + let new; + // Scoped borrow of `e`. + { + let existing_types = e.get(); + if existing_types.is_empty() && // The set is empty: no types are possible. + self.empty_because.is_some() { + panic!("Uh oh: we failed this pattern, probably because {:?} couldn't match, but now we're broadening its type.", + e.key()); + } + new = existing_types.union(&new_types); } - e.get_mut().extend(new_types.into_iter()); + e.insert(new); }, } } @@ -501,7 +499,7 @@ impl ConjoiningClauses { /// If no types are already known -- `var` could have any type -- then this is equivalent to /// simply setting the known types to `types`. /// If the known types don't intersect with `types`, mark the pattern as known-empty. - fn narrow_types_for_var(&mut self, var: Variable, types: HashSet) { + fn narrow_types_for_var(&mut self, var: Variable, types: ValueTypeSet) { if types.is_empty() { // We hope this never occurs; we should catch this case earlier. self.mark_known_empty(EmptyBecause::NoValidTypes(var)); @@ -515,10 +513,9 @@ impl ConjoiningClauses { e.insert(types); }, Entry::Occupied(mut e) => { - // TODO: we shouldn't need to clone here. - let intersected: HashSet<_> = types.intersection(e.get()).cloned().collect(); + let intersected: ValueTypeSet = types.intersection(e.get()); if intersected.is_empty() { - let mismatching_type = types.iter().next().unwrap().clone(); + let mismatching_type = types.exemplar().expect("types isn't none"); let reason = EmptyBecause::TypeMismatch(e.key().clone(), e.get().clone(), mismatching_type); @@ -536,7 +533,7 @@ impl ConjoiningClauses { /// Restrict the sets of types for the provided vars to the provided types. /// See `narrow_types_for_var`. - pub fn narrow_types(&mut self, additional_types: BTreeMap>) { + pub fn narrow_types(&mut self, additional_types: BTreeMap) { if additional_types.is_empty() { return; } @@ -623,7 +620,7 @@ impl ConjoiningClauses { &PatternValuePlace::Variable(ref v) => { // Do we know that this variable can't be a string? If so, we don't need // AllDatoms. None or String means it could be or definitely is. - match self.known_types.get(v).map(|types| types.contains(&ValueType::String)) { + match self.known_types.get(v).map(|types| types.contains(ValueType::String)) { Some(false) => DatomsTable::Datoms, _ => DatomsTable::AllDatoms, } @@ -769,7 +766,7 @@ impl ConjoiningClauses { return; } for (var, types) in self.known_types.iter() { - if types.len() == 1 { + if types.is_unit() { self.extracted_types.remove(var); } } diff --git a/query-algebrizer/src/clauses/or.rs b/query-algebrizer/src/clauses/or.rs index d14dd9e1..971ff010 100644 --- a/query-algebrizer/src/clauses/or.rs +++ b/query-algebrizer/src/clauses/or.rs @@ -9,7 +9,10 @@ // specific language governing permissions and limitations under the License. use std::collections::btree_map::Entry; -use std::collections::BTreeSet; +use std::collections::{ + BTreeMap, + BTreeSet, +}; use mentat_core::{ Schema, @@ -41,6 +44,7 @@ use types::{ EmptyBecause, QualifiedAlias, SourceAlias, + ValueTypeSet, VariableColumn, }; @@ -654,6 +658,21 @@ impl ConjoiningClauses { type_associations = type_needed.iter().cloned().collect(); } + // Collect the new type information from the arms. There's some redundant work here -- + // they already have all of the information from the parent. + // Note that we start with the first clause's type information. + { + let mut clauses = acc.iter(); + let mut additional_types = clauses.next() + .expect("there to be at least one clause") + .known_types + .clone(); + for cc in clauses { + union_types(&mut additional_types, &cc.known_types); + } + self.broaden_types(additional_types); + } + let union = ComputedTable::Union { projection: projection, type_extraction: type_needed, @@ -674,6 +693,43 @@ impl ConjoiningClauses { } } +/// Helper to fold together a set of type maps. +fn union_types(into: &mut BTreeMap, + additional_types: &BTreeMap) { + // We want the exclusive disjunction -- any variable not mentioned in both sets -- to default + // to ValueTypeSet::Any. + // This is necessary because we lazily populate known_types, so sometimes the type set will + // be missing a `ValueTypeSet::Any` for a variable, and we want to broaden rather than + // accidentally taking the other side's word for it! + // The alternative would be to exhaustively pre-fill `known_types` with all mentioned variables + // in the whole query, which is daunting. + let mut any: BTreeMap; + // Scoped borrow of `into`. + { + let i: BTreeSet<&Variable> = into.keys().collect(); + let a: BTreeSet<&Variable> = additional_types.keys().collect(); + any = i.symmetric_difference(&a) + .map(|v| ((*v).clone(), ValueTypeSet::any())) + .collect(); + } + + // Collect the additional types. + for (var, new_types) in additional_types { + match into.entry(var.clone()) { + Entry::Vacant(e) => { + e.insert(new_types.clone()); + }, + Entry::Occupied(mut e) => { + let new = e.get().union(&new_types); + e.insert(new); + }, + } + } + + // Blat in those that are disjoint. + into.append(&mut any); +} + #[cfg(test)] mod testing { extern crate mentat_query_parser; diff --git a/query-algebrizer/src/clauses/pattern.rs b/query-algebrizer/src/clauses/pattern.rs index 21e98138..65bcfd01 100644 --- a/query-algebrizer/src/clauses/pattern.rs +++ b/query-algebrizer/src/clauses/pattern.rs @@ -293,7 +293,6 @@ mod testing { add_attribute, associate_ident, ident, - unit_type_set, }; use types::{ @@ -303,6 +302,7 @@ mod testing { QualifiedAlias, QueryValue, SourceAlias, + ValueTypeSet, }; use algebrize; @@ -801,7 +801,7 @@ mod testing { assert!(cc.is_known_empty()); assert_eq!(cc.empty_because.unwrap(), - EmptyBecause::TypeMismatch(y.clone(), unit_type_set(ValueType::String), ValueType::Boolean)); + EmptyBecause::TypeMismatch(y.clone(), ValueTypeSet::of_one(ValueType::String), ValueType::Boolean)); } #[test] @@ -839,7 +839,7 @@ mod testing { assert!(cc.is_known_empty()); assert_eq!(cc.empty_because.unwrap(), - EmptyBecause::TypeMismatch(x.clone(), unit_type_set(ValueType::Ref), ValueType::Boolean)); + EmptyBecause::TypeMismatch(x.clone(), ValueTypeSet::of_one(ValueType::Ref), ValueType::Boolean)); } #[test] @@ -854,8 +854,8 @@ mod testing { let e = Variable::from_valid_name("?e"); let v = Variable::from_valid_name("?v"); let cc = alg(&schema, query); - assert_eq!(cc.known_types.get(&e), Some(&unit_type_set(ValueType::Ref))); - assert_eq!(cc.known_types.get(&v), Some(&unit_type_set(ValueType::Boolean))); + assert_eq!(cc.known_types.get(&e), Some(&ValueTypeSet::of_one(ValueType::Ref))); + assert_eq!(cc.known_types.get(&v), Some(&ValueTypeSet::of_one(ValueType::Boolean))); assert!(!cc.extracted_types.contains_key(&e)); assert!(!cc.extracted_types.contains_key(&v)); } diff --git a/query-algebrizer/src/clauses/predicate.rs b/query-algebrizer/src/clauses/predicate.rs index 031d20be..fca10e9a 100644 --- a/query-algebrizer/src/clauses/predicate.rs +++ b/query-algebrizer/src/clauses/predicate.rs @@ -87,7 +87,6 @@ impl ConjoiningClauses { mod testing { use super::*; - use std::collections::HashSet; use mentat_core::attribute::Unique; use mentat_core::{ Attribute, @@ -115,6 +114,7 @@ mod testing { ColumnConstraint, EmptyBecause, QueryValue, + ValueTypeSet, }; #[test] @@ -159,7 +159,7 @@ mod testing { // After processing those two clauses, we know that ?y must be numeric, but not exactly // which type it must be. assert_eq!(None, cc.known_type(&y)); // Not just one. - let expected: HashSet = vec![ValueType::Double, ValueType::Long].into_iter().collect(); + let expected = ValueTypeSet::of_numeric_types(); assert_eq!(Some(&expected), cc.known_types.get(&y)); let clauses = cc.wheres; @@ -225,8 +225,7 @@ mod testing { assert!(cc.is_known_empty()); assert_eq!(cc.empty_because.unwrap(), EmptyBecause::TypeMismatch(y.clone(), - vec![ValueType::Double, ValueType::Long].into_iter() - .collect(), + ValueTypeSet::of_numeric_types(), ValueType::String)); } } \ No newline at end of file diff --git a/query-algebrizer/src/lib.rs b/query-algebrizer/src/lib.rs index 4315a6dc..a890d065 100644 --- a/query-algebrizer/src/lib.rs +++ b/query-algebrizer/src/lib.rs @@ -8,6 +8,8 @@ // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. +extern crate enum_set; + #[macro_use] extern crate error_chain; diff --git a/query-algebrizer/src/types.rs b/query-algebrizer/src/types.rs index 8652d793..a9988a67 100644 --- a/query-algebrizer/src/types.rs +++ b/query-algebrizer/src/types.rs @@ -9,14 +9,17 @@ // specific language governing permissions and limitations under the License. use std::collections::BTreeSet; -use std::collections::HashSet; - use std::fmt::{ Debug, Formatter, Result, }; +use enum_set::{ + CLike, + EnumSet, +}; + use mentat_core::{ Entid, TypedValue, @@ -411,7 +414,7 @@ impl Debug for ColumnConstraint { #[derive(PartialEq, Clone)] pub enum EmptyBecause { // Var, existing, desired. - TypeMismatch(Variable, HashSet, ValueType), + TypeMismatch(Variable, ValueTypeSet, ValueType), NoValidTypes(Variable), NonNumericArgument, NonStringFulltextValue, @@ -461,4 +464,89 @@ impl Debug for EmptyBecause { }, } } +} + +trait EnumSetExtensions { + /// Return a set containing both `x` and `y`. + fn of_both(x: T, y: T) -> EnumSet; + + /// Return a clone of `self` with `y` added. + fn with(&self, y: T) -> EnumSet; +} + +impl EnumSetExtensions for EnumSet { + /// Return a set containing both `x` and `y`. + fn of_both(x: T, y: T) -> Self { + let mut o = EnumSet::new(); + o.insert(x); + o.insert(y); + o + } + + /// Return a clone of `self` with `y` added. + fn with(&self, y: T) -> EnumSet { + let mut o = self.clone(); + o.insert(y); + o + } +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct ValueTypeSet(pub EnumSet); + +impl Default for ValueTypeSet { + fn default() -> ValueTypeSet { + ValueTypeSet::any() + } +} + +impl ValueTypeSet { + pub fn any() -> ValueTypeSet { + ValueTypeSet(ValueType::all_enums()) + } + + pub fn none() -> ValueTypeSet { + ValueTypeSet(EnumSet::new()) + } + + /// Return a set containing only `t`. + pub fn of_one(t: ValueType) -> ValueTypeSet { + let mut s = EnumSet::new(); + s.insert(t); + ValueTypeSet(s) + } + + /// Return a set containing `Double` and `Long`. + pub fn of_numeric_types() -> ValueTypeSet { + ValueTypeSet(EnumSet::of_both(ValueType::Double, ValueType::Long)) + } +} + +impl ValueTypeSet { + /// Returns a set containing all the types in this set and `other`. + pub fn union(&self, other: &ValueTypeSet) -> ValueTypeSet { + ValueTypeSet(self.0.union(other.0)) + } + + pub fn intersection(&self, other: &ValueTypeSet) -> ValueTypeSet { + ValueTypeSet(self.0.intersection(other.0)) + } + + /// Return an arbitrary type that's part of this set. + /// For a set containing a single type, this will be that type. + pub fn exemplar(&self) -> Option { + self.0.iter().next() + } + + pub fn contains(&self, vt: ValueType) -> bool { + self.0.contains(&vt) + } + + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + pub fn is_unit(&self) -> bool { + self.0.len() == 1 + } } \ No newline at end of file diff --git a/query-translator/tests/translate.rs b/query-translator/tests/translate.rs index 9d533b69..fef09d2a 100644 --- a/query-translator/tests/translate.rs +++ b/query-translator/tests/translate.rs @@ -466,3 +466,34 @@ fn test_order_by() { ORDER BY `?y_value_type_tag` ASC, `?y` ASC, `?x` ASC"); assert_eq!(args, vec![]); } + +#[test] +fn test_complex_nested_or_join_type_projection() { + let mut schema = Schema::default(); + associate_ident(&mut schema, NamespacedKeyword::new("page", "title"), 98); + add_attribute(&mut schema, 98, Attribute { + value_type: ValueType::String, + ..Default::default() + }); + + let input = r#"[:find [?y] + :where + (or + (or + [_ :page/title ?y]) + (or + [_ :page/title ?y]))]"#; + + let SQLQuery { sql, args } = translate(&schema, input); + assert_eq!(sql, "SELECT `c00`.`?y` AS `?y` \ + FROM (SELECT `datoms00`.v AS `?y` \ + FROM `datoms` AS `datoms00` \ + WHERE `datoms00`.a = 98 \ + UNION \ + SELECT `datoms01`.v AS `?y` \ + FROM `datoms` AS `datoms01` \ + WHERE `datoms01`.a = 98) \ + AS `c00` \ + LIMIT 1"); + assert_eq!(args, vec![]); +} \ No newline at end of file