diff --git a/query-algebrizer/src/cc.rs b/query-algebrizer/src/cc.rs index 4863f923..40cdcb6c 100644 --- a/query-algebrizer/src/cc.rs +++ b/query-algebrizer/src/cc.rs @@ -18,7 +18,11 @@ use std::fmt::{ Formatter, Result, }; -use std::collections::BTreeMap; +use std::collections::{ + BTreeMap, + BTreeSet, +}; + use std::collections::btree_map::Entry; use self::mentat_core::{ @@ -198,6 +202,7 @@ impl Debug for ColumnConstraint { pub struct ConjoiningClauses { /// `true` if this set of clauses cannot yield results in the context of the current schema. pub is_known_empty: bool, + pub empty_because: Option, /// A function used to generate an alias for a table -- e.g., from "datoms" to "datoms123". aliaser: TableAliaser, @@ -209,25 +214,82 @@ pub struct ConjoiningClauses { pub wheres: Vec, /// A map from var to qualified columns. Used to project. - pub bindings: BTreeMap>, + pub column_bindings: BTreeMap>, + + /// A list of variables mentioned in the enclosing query's :in clause. These must all be bound + /// before the query can be executed. TODO: clarify what this means for nested CCs. + pub input_variables: BTreeSet, + + /// In some situations -- e.g., when a query is being run only once -- we know in advance the + /// values bound to some or all variables. These can be substituted directly when the query is + /// algebrized. + /// + /// Value bindings must agree with `known_types`. If you write a query like + /// ```edn + /// [:find ?x :in $ ?val :where [?x :foo/int ?val]] + /// ``` + /// + /// and for `?val` provide `TypedValue::String("foo".to_string())`, the query will be known at + /// algebrizing time to be empty. + value_bindings: BTreeMap, /// A map from var to type. Whenever a var maps unambiguously to two different types, it cannot /// yield results, so we don't represent that case here. If a var isn't present in the map, it /// means that its type is not known in advance. pub known_types: BTreeMap, - /// A mapping, similar to `bindings`, but used to pull type tags out of the store at runtime. + /// A mapping, similar to `column_bindings`, but used to pull type tags out of the store at runtime. /// If a var isn't present in `known_types`, it should be present here. extracted_types: BTreeMap, } +pub enum EmptyBecause { + // Var, existing, desired. + TypeMismatch(Variable, ValueType, ValueType), + UnresolvedIdent(NamespacedKeyword), + InvalidAttributeIdent(NamespacedKeyword), + InvalidAttributeEntid(Entid), + ValueTypeMismatch(ValueType, TypedValue), + AttributeLookupFailed, // Catch-all, because the table lookup code is lazy. TODO +} + +impl Debug for EmptyBecause { + fn fmt(&self, f: &mut Formatter) -> Result { + use self::EmptyBecause::*; + match self { + &TypeMismatch(ref var, ref existing, ref desired) => { + write!(f, "Type mismatch: {:?} can't be {:?}, because it's already {:?}", + var, desired, existing) + }, + &UnresolvedIdent(ref kw) => { + write!(f, "Couldn't resolve keyword {}", kw) + }, + &InvalidAttributeIdent(ref kw) => { + write!(f, "{} does not name an attribute", kw) + }, + &InvalidAttributeEntid(entid) => { + write!(f, "{} is not an attribute", entid) + }, + &ValueTypeMismatch(value_type, ref typed_value) => { + write!(f, "Type mismatch: {:?} doesn't match attribute type {:?}", + typed_value, value_type) + }, + &AttributeLookupFailed => { + write!(f, "Attribute lookup failed") + }, + } + } +} + impl Debug for ConjoiningClauses { fn fmt(&self, fmt: &mut Formatter) -> Result { fmt.debug_struct("ConjoiningClauses") .field("is_known_empty", &self.is_known_empty) .field("from", &self.from) .field("wheres", &self.wheres) - .field("bindings", &self.bindings) + .field("column_bindings", &self.column_bindings) + .field("input_variables", &self.input_variables) + .field("value_bindings", &self.value_bindings) .field("known_types", &self.known_types) .field("extracted_types", &self.extracted_types) .finish() @@ -239,10 +301,13 @@ impl Default for ConjoiningClauses { fn default() -> ConjoiningClauses { ConjoiningClauses { is_known_empty: false, + empty_because: None, aliaser: default_table_aliaser(), from: vec![], wheres: vec![], - bindings: BTreeMap::new(), + input_variables: BTreeSet::new(), + column_bindings: BTreeMap::new(), + value_bindings: BTreeMap::new(), known_types: BTreeMap::new(), extracted_types: BTreeMap::new(), } @@ -250,22 +315,53 @@ impl Default for ConjoiningClauses { } impl ConjoiningClauses { + fn with_value_bindings(bindings: BTreeMap) -> ConjoiningClauses { + let mut cc = ConjoiningClauses { + value_bindings: bindings, + ..Default::default() + }; + + // Pre-fill our type mappings with the types of the input bindings. + cc.known_types + .extend(cc.value_bindings.iter() + .map(|(k, v)| (k.clone(), v.value_type()))); + cc + } +} + +impl ConjoiningClauses { + fn bound_value(&self, var: &Variable) -> Option { + self.value_bindings.get(var).cloned() + } + pub fn bind_column_to_var(&mut self, table: TableAlias, column: DatomsColumn, var: Variable) { - let alias = QualifiedAlias(table, column); + // Do we have an external binding for this? + if let Some(bound_val) = self.bound_value(&var) { + // Great! Use that instead. + self.constrain_column_to_constant(table, column, bound_val); + return; + } + + // Will we have an external binding for this? + // If so, we don't need to extract its type. We'll know it later. + let late_binding = self.input_variables.contains(&var); // If this is a value, and we don't already know its type or where // to get its type, record that we can get it from this table. let needs_type_extraction = - alias.is_value() && - !self.known_types.contains_key(&var) && - !self.extracted_types.contains_key(&var); + !late_binding && // Never need to extract for bound vars. + column == DatomsColumn::Value && // Never need to extract types for refs. + !self.known_types.contains_key(&var) && // We know the type! + !self.extracted_types.contains_key(&var); // We're already extracting the type. + + let alias = QualifiedAlias(table, column); // If we subsequently find out its type, we'll remove this later -- see // the removal in `constrain_var_to_type`. if needs_type_extraction { self.extracted_types.insert(var.clone(), alias.for_type_tag()); } - self.bindings.entry(var).or_insert(vec![]).push(alias); + self.column_bindings.entry(var).or_insert(vec![]).push(alias); } pub fn constrain_column_to_constant(&mut self, table: TableAlias, column: DatomsColumn, constant: TypedValue) { @@ -285,9 +381,9 @@ impl ConjoiningClauses { } /// Constrains the var if there's no existing type. - /// Returns `false` if it's impossible for this type to apply (because there's a conflicting - /// type already known). - fn constrain_var_to_type(&mut self, variable: Variable, this_type: ValueType) -> bool { + /// Marks as known-empty if it's impossible for this type to apply because there's a conflicting + /// type already known. + fn constrain_var_to_type(&mut self, variable: Variable, this_type: ValueType) { // If this variable now has a known attribute, we can unhook extracted types for // any other instances of that variable. // For example, given @@ -300,17 +396,18 @@ impl ConjoiningClauses { // the second pattern we can avoid that. self.extracted_types.remove(&variable); - // Is there an existing binding for this variable? - let types_entry = self.known_types.entry(variable); - match types_entry { + // Is there an existing mapping for this variable? + // Any known inputs have already been added to known_types, and so if they conflict we'll + // spot it here. + if let Some(existing) = self.known_types.get(&variable).cloned() { // If so, the types must match. - Entry::Occupied(entry) => - *entry.get() == this_type, - // If not, record the one we just determined. - Entry::Vacant(entry) => { - entry.insert(this_type); - true + if existing != this_type { + self.mark_known_empty(EmptyBecause::TypeMismatch(variable, existing, this_type)); } + } else { + // If not, record the one we just determined. + self.known_types.insert(variable, this_type); + } } @@ -331,15 +428,14 @@ impl ConjoiningClauses { // Ident or attribute resolution errors (the only other check we need to do) will be done // by the caller. if let &PatternNonValuePlace::Variable(ref v) = value { - if !self.constrain_var_to_type(v.clone(), ValueType::Ref) { - self.mark_known_empty("Couldn't constrain var to Ref."); - } + self.constrain_var_to_type(v.clone(), ValueType::Ref) } } - fn mark_known_empty(&mut self, why: &str) { + fn mark_known_empty(&mut self, why: EmptyBecause) { self.is_known_empty = true; - println!("{}", why); // TODO: proper logging. + println!("CC known empty: {:?}.", &why); // TODO: proper logging. + self.empty_because = Some(why); } fn entid_for_ident<'s, 'a>(&self, schema: &'s Schema, ident: &'a NamespacedKeyword) -> Option { @@ -433,7 +529,7 @@ impl ConjoiningClauses { /// Expansions. impl ConjoiningClauses { - /// Take the contents of `bindings` and generate inter-constraints for the appropriate + /// Take the contents of `column_bindings` and generate inter-constraints for the appropriate /// columns into `wheres`. /// /// For example, a bindings map associating a var to three places in the query, like @@ -448,8 +544,8 @@ impl ConjoiningClauses { /// datoms12.e = datoms13.v /// datoms12.e = datoms14.e /// ``` - pub fn expand_bindings(&mut self) { - for cols in self.bindings.values() { + pub fn expand_column_bindings(&mut self) { + for cols in self.column_bindings.values() { if cols.len() > 1 { let ref primary = cols[0]; let secondaries = cols.iter().skip(1); @@ -541,7 +637,7 @@ impl ConjoiningClauses { match pattern.entity { PatternNonValuePlace::Placeholder => - // Placeholders don't contribute any bindings, nor do + // Placeholders don't contribute any column bindings, nor do // they constrain the query -- there's no need to produce // IS NOT NULL, because we don't store nulls in our schema. (), @@ -554,7 +650,7 @@ impl ConjoiningClauses { self.constrain_column_to_entity(col.clone(), DatomsColumn::Entity, entid) } else { // A resolution failure means we're done here. - self.mark_known_empty("Entity ident didn't resolve."); + self.mark_known_empty(EmptyBecause::UnresolvedIdent(ident.clone())); return; } } @@ -569,7 +665,7 @@ impl ConjoiningClauses { if !schema.is_attribute(entid) { // Furthermore, that entid must resolve to an attribute. If it doesn't, this // query is meaningless. - self.mark_known_empty("Attribute entid isn't an attribute."); + self.mark_known_empty(EmptyBecause::InvalidAttributeEntid(entid)); return; } self.constrain_attribute(col.clone(), entid) @@ -579,12 +675,12 @@ impl ConjoiningClauses { self.constrain_attribute(col.clone(), entid); if !schema.is_attribute(entid) { - self.mark_known_empty("Attribute ident isn't an attribute."); + self.mark_known_empty(EmptyBecause::InvalidAttributeIdent(ident.clone())); return; } } else { // A resolution failure means we're done here. - self.mark_known_empty("Attribute ident didn't resolve."); + self.mark_known_empty(EmptyBecause::UnresolvedIdent(ident.clone())); return; } } @@ -605,9 +701,8 @@ impl ConjoiningClauses { if let Some(this_type) = value_type { // Wouldn't it be nice if we didn't need to clone in the found case? // It doesn't matter too much: collisons won't be too frequent. - if !self.constrain_var_to_type(v.clone(), this_type) { - // The types don't match. This pattern cannot succeed. - self.mark_known_empty("Value types don't match."); + self.constrain_var_to_type(v.clone(), this_type); + if self.is_known_empty { return; } } @@ -647,7 +742,7 @@ impl ConjoiningClauses { } else { // A resolution failure means we're done here: this attribute must have an // entity value. - self.mark_known_empty("Value ident didn't resolve."); + self.mark_known_empty(EmptyBecause::UnresolvedIdent(kw.clone())); return; } } else { @@ -661,7 +756,11 @@ impl ConjoiningClauses { let typed_value = c.clone().into_typed_value(); if !typed_value.is_congruent_with(value_type) { // If the attribute and its value don't match, the pattern must fail. - self.mark_known_empty("Value constant not congruent with attribute type."); + // We can never have a congruence failure if `value_type` is `None`, so we + // forcibly unwrap here. + let value_type = value_type.expect("Congruence failure but couldn't unwrap"); + let why = EmptyBecause::ValueTypeMismatch(value_type, typed_value); + self.mark_known_empty(why); return; } @@ -707,7 +806,8 @@ impl ConjoiningClauses { // We didn't determine a table, likely because there was a mismatch // between an attribute and a value. // We know we cannot return a result, so we short-circuit here. - self.mark_known_empty("Table aliaser couldn't determine a table."); + self.mark_known_empty(EmptyBecause::AttributeLookupFailed); + return; } } } @@ -793,7 +893,7 @@ mod testing { assert_eq!(cc.known_types.get(&x).unwrap(), &ValueType::Ref); // ?x is bound to datoms0.e. - assert_eq!(cc.bindings.get(&x).unwrap(), &vec![d0_e.clone()]); + assert_eq!(cc.column_bindings.get(&x).unwrap(), &vec![d0_e.clone()]); // Our 'where' clauses are two: // - datoms0.a = 99 @@ -831,7 +931,7 @@ mod testing { assert_eq!(cc.known_types.get(&x).unwrap(), &ValueType::Ref); // ?x is bound to datoms0.e. - assert_eq!(cc.bindings.get(&x).unwrap(), &vec![d0_e.clone()]); + assert_eq!(cc.column_bindings.get(&x).unwrap(), &vec![d0_e.clone()]); // Our 'where' clauses are two: // - datoms0.v = true @@ -877,8 +977,8 @@ mod testing { tx: PatternNonValuePlace::Placeholder, }); - // Finally, expand bindings to get the overlaps for ?x. - cc.expand_bindings(); + // Finally, expand column bindings to get the overlaps for ?x. + cc.expand_column_bindings(); println!("{:#?}", cc); @@ -898,7 +998,7 @@ mod testing { assert_eq!(cc.known_types.get(&x).unwrap(), &ValueType::Ref); // ?x is bound to datoms0.e and datoms1.e. - assert_eq!(cc.bindings.get(&x).unwrap(), + assert_eq!(cc.column_bindings.get(&x).unwrap(), &vec![ d0_e.clone(), d1_e.clone(), @@ -917,4 +1017,75 @@ mod testing { ]); } + #[test] + fn test_value_bindings() { + let mut schema = Schema::default(); + + associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 99); + add_attribute(&mut schema, 99, Attribute { + value_type: ValueType::Boolean, + ..Default::default() + }); + + let x = Variable(PlainSymbol::new("?x")); + let y = Variable(PlainSymbol::new("?y")); + + let b: BTreeMap = + vec![(y.clone(), TypedValue::Boolean(true))].into_iter().collect(); + let mut cc = ConjoiningClauses::with_value_bindings(b); + + cc.apply_pattern(&schema, &Pattern { + source: None, + entity: PatternNonValuePlace::Variable(x.clone()), + attribute: PatternNonValuePlace::Ident(NamespacedKeyword::new("foo", "bar")), + value: PatternValuePlace::Variable(y.clone()), + tx: PatternNonValuePlace::Placeholder, + }); + + let d0_e = QualifiedAlias("datoms00".to_string(), DatomsColumn::Entity); + let d0_a = QualifiedAlias("datoms00".to_string(), DatomsColumn::Attribute); + let d0_v = QualifiedAlias("datoms00".to_string(), DatomsColumn::Value); + + // ?y has been expanded into `true`. + assert_eq!(cc.wheres, vec![ + ColumnConstraint::EqualsEntity(d0_a, 99), + ColumnConstraint::EqualsValue(d0_v, TypedValue::Boolean(true)), + ]); + + // There is no binding for ?y. + assert!(!cc.column_bindings.contains_key(&y)); + + // ?x is bound to the entity. + assert_eq!(cc.column_bindings.get(&x).unwrap(), + &vec![d0_e.clone()]); + } + + #[test] + fn test_value_bindings_type_disagreement() { + let mut schema = Schema::default(); + + associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 99); + add_attribute(&mut schema, 99, Attribute { + value_type: ValueType::Boolean, + ..Default::default() + }); + + let x = Variable(PlainSymbol::new("?x")); + let y = Variable(PlainSymbol::new("?y")); + + let b: BTreeMap = + vec![(y.clone(), TypedValue::Long(42))].into_iter().collect(); + let mut cc = ConjoiningClauses::with_value_bindings(b); + + cc.apply_pattern(&schema, &Pattern { + source: None, + entity: PatternNonValuePlace::Variable(x.clone()), + attribute: PatternNonValuePlace::Ident(NamespacedKeyword::new("foo", "bar")), + value: PatternValuePlace::Variable(y.clone()), + tx: PatternNonValuePlace::Placeholder, + }); + + // The type of the provided binding doesn't match the type of the attribute. + assert!(cc.is_known_empty); + } } diff --git a/query-projector/src/lib.rs b/query-projector/src/lib.rs index 5ebbfe1a..fa0b4e3b 100644 --- a/query-projector/src/lib.rs +++ b/query-projector/src/lib.rs @@ -212,7 +212,7 @@ fn project_elements<'a, I: IntoIterator>( // one column in the query. If that constraint is violated it's a // bug in our code, so it's appropriate to panic here. let columns = query.cc - .bindings + .column_bindings .get(var) .expect("Every variable has a binding");