From 79fa0994b3ad9d0d9ab6417eaf91e97cb0b25e25 Mon Sep 17 00:00:00 2001 From: Nick Alexander Date: Wed, 26 Apr 2017 15:50:17 -0700 Subject: [PATCH] Part 3: Handle `ground`. (#469) r=nalexander,rnewman This version removes nalexander's lovely matrix code. It turned out that scalar and tuple bindings are sufficiently different from coll and rel -- they can directly apply as values in the query -- that there was no point in jumping through hoops to turn those single values into a matrix. Furthermore, I've standardized us on a Vec representation for rectangular matrices, which should be much more efficient, but would have required rewriting that code. Finally, coll and rel are sufficiently different from each other -- coll doesn't require processing nested collections -- that my attempts to share code between them fell somewhat flat. I had lots of nice ideas about zipping together cycles and such, but ultimately I ended up with relatively straightforward, if a bit repetitive, code. The next commit will demonstrate the value of this work -- tests that exercised scalar and tuple grounding now collapse down to the simplest possible SQL. --- query-algebrizer/Cargo.toml | 1 + query-algebrizer/src/clauses/mod.rs | 20 +- query-algebrizer/src/clauses/or.rs | 1 + query-algebrizer/src/clauses/pattern.rs | 12 +- query-algebrizer/src/clauses/predicate.rs | 10 +- query-algebrizer/src/clauses/where_fn.rs | 560 ++++++++++++++++++++++ query-algebrizer/src/errors.rs | 29 +- query-algebrizer/src/lib.rs | 1 + query-algebrizer/src/types.rs | 16 +- query-algebrizer/tests/ground.rs | 315 ++++++++++++ query-translator/src/translate.rs | 9 +- query-translator/tests/translate.rs | 174 +++++++ query/src/lib.rs | 86 +++- 13 files changed, 1205 insertions(+), 29 deletions(-) create mode 100644 query-algebrizer/src/clauses/where_fn.rs create mode 100644 query-algebrizer/tests/ground.rs diff --git a/query-algebrizer/Cargo.toml b/query-algebrizer/Cargo.toml index 38a30fa5..b1dcf062 100644 --- a/query-algebrizer/Cargo.toml +++ b/query-algebrizer/Cargo.toml @@ -18,4 +18,5 @@ path = "../query" path = "../query-parser" [dev-dependencies] +itertools = "0.5" maplit = "0.1" diff --git a/query-algebrizer/src/clauses/mod.rs b/query-algebrizer/src/clauses/mod.rs index eabe925a..e94713da 100644 --- a/query-algebrizer/src/clauses/mod.rs +++ b/query-algebrizer/src/clauses/mod.rs @@ -67,6 +67,7 @@ mod not; mod pattern; mod predicate; mod resolve; +mod where_fn; use validate::{ validate_not_join, @@ -141,6 +142,7 @@ impl Intersection for BTreeMap { /// /// - Ordinary pattern clauses turn into `FROM` parts and `WHERE` parts using `=`. /// - Predicate clauses turn into the same, but with other functions. +/// - Function clauses turn into `WHERE` parts using function-specific comparisons. /// - `not` turns into `NOT EXISTS` with `WHERE` clauses inside the subquery to /// bind it to the outer variables, or adds simple `WHERE` clauses to the outer /// clause. @@ -228,6 +230,7 @@ impl Debug for ConjoiningClauses { fmt.debug_struct("ConjoiningClauses") .field("empty_because", &self.empty_because) .field("from", &self.from) + .field("computed_tables", &self.computed_tables) .field("wheres", &self.wheres) .field("column_bindings", &self.column_bindings) .field("input_variables", &self.input_variables) @@ -479,14 +482,15 @@ impl ConjoiningClauses { /// Constrains the var if there's no existing type. /// Marks as known-empty if it's impossible for this type to apply because there's a conflicting /// type already known. - fn constrain_var_to_type(&mut self, variable: Variable, this_type: ValueType) { + fn constrain_var_to_type(&mut self, var: Variable, this_type: ValueType) { // Is there an existing mapping for this variable? // Any known inputs have already been added to known_types, and so if they conflict we'll // spot it here. - if let Some(existing) = self.known_types.insert(variable.clone(), ValueTypeSet::of_one(this_type)) { + let this_type_set = ValueTypeSet::of_one(this_type); + if let Some(existing) = self.known_types.insert(var.clone(), this_type_set) { // There was an existing mapping. Does this type match? if !existing.contains(this_type) { - self.mark_known_empty(EmptyBecause::TypeMismatch(variable, existing, this_type)); + self.mark_known_empty(EmptyBecause::TypeMismatch { var, existing, desired: this_type_set }); } } } @@ -545,10 +549,9 @@ impl ConjoiningClauses { Entry::Occupied(mut e) => { let intersected: ValueTypeSet = types.intersection(e.get()); if intersected.is_empty() { - let mismatching_type = types.exemplar().expect("types isn't none"); - let reason = EmptyBecause::TypeMismatch(e.key().clone(), - e.get().clone(), - mismatching_type); + let reason = EmptyBecause::TypeMismatch { var: e.key().clone(), + existing: e.get().clone(), + desired: types }; empty_because = Some(reason); } // Always insert, even if it's empty! @@ -838,6 +841,9 @@ impl ConjoiningClauses { WhereClause::Pred(p) => { self.apply_predicate(schema, p) }, + WhereClause::WhereFn(f) => { + self.apply_where_fn(schema, f) + }, WhereClause::OrJoin(o) => { validate_or_join(&o)?; self.apply_or_join(schema, o) diff --git a/query-algebrizer/src/clauses/or.rs b/query-algebrizer/src/clauses/or.rs index d4dfe2d9..26ecf48a 100644 --- a/query-algebrizer/src/clauses/or.rs +++ b/query-algebrizer/src/clauses/or.rs @@ -813,6 +813,7 @@ mod testing { }); schema } + /// Test that if all the attributes in an `or` fail to resolve, the entire thing fails. #[test] fn test_schema_based_failure() { diff --git a/query-algebrizer/src/clauses/pattern.rs b/query-algebrizer/src/clauses/pattern.rs index f9c084a3..7874ebdf 100644 --- a/query-algebrizer/src/clauses/pattern.rs +++ b/query-algebrizer/src/clauses/pattern.rs @@ -801,7 +801,11 @@ mod testing { assert!(cc.is_known_empty()); assert_eq!(cc.empty_because.unwrap(), - EmptyBecause::TypeMismatch(y.clone(), ValueTypeSet::of_one(ValueType::String), ValueType::Boolean)); + EmptyBecause::TypeMismatch { + var: y.clone(), + existing: ValueTypeSet::of_one(ValueType::String), + desired: ValueTypeSet::of_one(ValueType::Boolean), + }); } #[test] @@ -839,7 +843,11 @@ mod testing { assert!(cc.is_known_empty()); assert_eq!(cc.empty_because.unwrap(), - EmptyBecause::TypeMismatch(x.clone(), ValueTypeSet::of_one(ValueType::Ref), ValueType::Boolean)); + EmptyBecause::TypeMismatch { + var: x.clone(), + existing: ValueTypeSet::of_one(ValueType::Ref), + desired: ValueTypeSet::of_one(ValueType::Boolean), + }); } #[test] diff --git a/query-algebrizer/src/clauses/predicate.rs b/query-algebrizer/src/clauses/predicate.rs index e0c56b54..16b12f8f 100644 --- a/query-algebrizer/src/clauses/predicate.rs +++ b/query-algebrizer/src/clauses/predicate.rs @@ -222,8 +222,10 @@ mod testing { assert!(cc.is_known_empty()); assert_eq!(cc.empty_because.unwrap(), - EmptyBecause::TypeMismatch(y.clone(), - ValueTypeSet::of_numeric_types(), - ValueType::String)); + EmptyBecause::TypeMismatch { + var: y.clone(), + existing: ValueTypeSet::of_numeric_types(), + desired: ValueTypeSet::of_one(ValueType::String), + }); } -} \ No newline at end of file +} diff --git a/query-algebrizer/src/clauses/where_fn.rs b/query-algebrizer/src/clauses/where_fn.rs new file mode 100644 index 00000000..ce348657 --- /dev/null +++ b/query-algebrizer/src/clauses/where_fn.rs @@ -0,0 +1,560 @@ +// Copyright 2016 Mozilla +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use +// this file except in compliance with the License. You may obtain a copy of the +// License at http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +use std::rc::Rc; + +use mentat_core::{ + Schema, + SQLValueType, + TypedValue, + ValueType, +}; + +use mentat_query::{ + Binding, + FnArg, + NonIntegerConstant, + Variable, + VariableOrPlaceholder, + WhereFn, +}; + +use clauses::{ + ConjoiningClauses, + PushComputed, +}; + +use errors::{ + BindingError, + ErrorKind, + Result, +}; + +use super::QualifiedAlias; + +use types::{ + ComputedTable, + EmptyBecause, + SourceAlias, + ValueTypeSet, + VariableColumn, +}; + +macro_rules! coerce_to_typed_value { + ($var: ident, $val: ident, $types: expr, $type: path, $constructor: path) => { { + Ok(if !$types.contains($type) { + Impossible(EmptyBecause::TypeMismatch { + var: $var.clone(), + existing: $types, + desired: ValueTypeSet::of_one($type), + }) + } else { + Val($constructor($val).into()) + }) + } } +} + +enum ValueConversion { + Val(TypedValue), + Impossible(EmptyBecause), +} + +/// Conversion of FnArgs to TypedValues. +impl ConjoiningClauses { + /// Convert the provided `FnArg` to a `TypedValue`. + /// The conversion depends on, and can fail because of: + /// - Existing known types of a variable to which this arg will be bound. + /// - Existing bindings of a variable `FnArg`. + fn typed_value_from_arg<'s>(&self, schema: &'s Schema, var: &Variable, arg: FnArg, known_types: ValueTypeSet) -> Result { + use self::ValueConversion::*; + if known_types.is_empty() { + // If this happens, it likely means the pattern has already failed! + return Ok(Impossible(EmptyBecause::TypeMismatch { + var: var.clone(), + existing: known_types, + desired: ValueTypeSet::any(), + })); + } + + match arg { + // Longs are potentially ambiguous: they might be longs or entids. + FnArg::EntidOrInteger(x) => { + match (ValueType::Ref.accommodates_integer(x), + known_types.contains(ValueType::Ref), + known_types.contains(ValueType::Long)) { + (true, true, true) => { + // Ambiguous: this arg could be an entid or a long. + // We default to long. + Ok(Val(TypedValue::Long(x))) + }, + (true, true, false) => { + // This can only be a ref. + Ok(Val(TypedValue::Ref(x))) + }, + (_, false, true) => { + // This can only be a long. + Ok(Val(TypedValue::Long(x))) + }, + (false, true, _) => { + // This isn't a valid ref, but that's the type to which this must conform! + Ok(Impossible(EmptyBecause::TypeMismatch { + var: var.clone(), + existing: known_types, + desired: ValueTypeSet::of_longs(), + })) + }, + (_, false, false) => { + // Non-overlapping type sets. + Ok(Impossible(EmptyBecause::TypeMismatch { + var: var.clone(), + existing: known_types, + desired: ValueTypeSet::of_longs(), + })) + }, + } + }, + + // If you definitely want to look up an ident, do it before running the query. + FnArg::IdentOrKeyword(x) => { + match (known_types.contains(ValueType::Ref), + known_types.contains(ValueType::Keyword)) { + (true, true) => { + // Ambiguous: this could be a keyword or an ident. + // Default to keyword. + Ok(Val(TypedValue::Keyword(Rc::new(x)))) + }, + (true, false) => { + // This can only be an ident. Look it up! + match schema.get_entid(&x).map(TypedValue::Ref) { + Some(e) => Ok(Val(e)), + None => Ok(Impossible(EmptyBecause::UnresolvedIdent(x.clone()))), + } + }, + (false, true) => { + Ok(Val(TypedValue::Keyword(Rc::new(x)))) + }, + (false, false) => { + Ok(Impossible(EmptyBecause::TypeMismatch { + var: var.clone(), + existing: known_types, + desired: ValueTypeSet::of_keywords(), + })) + }, + } + }, + + FnArg::Variable(in_var) => { + // TODO: technically you could ground an existing variable inside the query…. + if !self.input_variables.contains(&in_var) { + bail!(ErrorKind::UnboundVariable((*in_var.0).clone())); + } + match self.bound_value(&in_var) { + // The type is already known if it's a bound variable…. + Some(ref in_value) => Ok(Val(in_value.clone())), + None => bail!(ErrorKind::UnboundVariable((*in_var.0).clone())), + } + }, + + // This isn't implemented yet. + FnArg::Constant(NonIntegerConstant::BigInteger(_)) => unimplemented!(), + + // These don't make sense here. + FnArg::Vector(_) | + FnArg::SrcVar(_) => bail!(ErrorKind::InvalidGroundConstant), + + // These are all straightforward. + FnArg::Constant(NonIntegerConstant::Boolean(x)) => { + coerce_to_typed_value!(var, x, known_types, ValueType::Boolean, TypedValue::Boolean) + }, + FnArg::Constant(NonIntegerConstant::Instant(x)) => { + coerce_to_typed_value!(var, x, known_types, ValueType::Instant, TypedValue::Instant) + }, + FnArg::Constant(NonIntegerConstant::Uuid(x)) => { + coerce_to_typed_value!(var, x, known_types, ValueType::Uuid, TypedValue::Uuid) + }, + FnArg::Constant(NonIntegerConstant::Float(x)) => { + coerce_to_typed_value!(var, x, known_types, ValueType::Double, TypedValue::Double) + }, + FnArg::Constant(NonIntegerConstant::Text(x)) => { + coerce_to_typed_value!(var, x, known_types, ValueType::String, TypedValue::String) + }, + } + } +} + +/// Application of `where` functions. +impl ConjoiningClauses { + /// There are several kinds of functions binding variables in our Datalog: + /// - A set of functions like `ground`, fulltext` and `get-else` that are translated into SQL + /// `VALUES`, `MATCH`, or `JOIN`, yielding bindings. + /// - In the future, some functions that are implemented via function calls in SQLite. + /// + /// At present we have implemented only a limited selection of functions. + pub fn apply_where_fn<'s>(&mut self, schema: &'s Schema, where_fn: WhereFn) -> Result<()> { + // Because we'll be growing the set of built-in functions, handling each differently, and + // ultimately allowing user-specified functions, we match on the function name first. + match where_fn.operator.0.as_str() { + "ground" => self.apply_ground(schema, where_fn), + _ => bail!(ErrorKind::UnknownFunction(where_fn.operator.clone())), + } + } + + fn apply_ground_place<'s>(&mut self, schema: &'s Schema, var: VariableOrPlaceholder, arg: FnArg) -> Result<()> { + match var { + VariableOrPlaceholder::Placeholder => Ok(()), + VariableOrPlaceholder::Variable(var) => self.apply_ground_var(schema, var, arg), + } + } + + /// Constrain the CC to associate the given var with the given ground argument. + /// Marks known-empty on failure. + fn apply_ground_var<'s>(&mut self, schema: &'s Schema, var: Variable, arg: FnArg) -> Result<()> { + let known_types = self.known_type_set(&var); + match self.typed_value_from_arg(schema, &var, arg, known_types)? { + ValueConversion::Val(value) => self.apply_ground_value(var, value), + ValueConversion::Impossible(because) => { + self.mark_known_empty(because); + Ok(()) + }, + } + } + + /// Marks known-empty on failure. + fn apply_ground_value(&mut self, var: Variable, value: TypedValue) -> Result<()> { + if let Some(existing) = self.bound_value(&var) { + if existing != value { + self.mark_known_empty(EmptyBecause::ConflictingBindings { + var: var.clone(), + existing: existing.clone(), + desired: value, + }); + return Ok(()) + } + } else { + self.bind_value(&var, value.clone()); + } + + // Check to see whether this variable is already associated to a column. + // If so, we want to add an equality filter (or, in the future, redo the existing patterns). + if let Some(QualifiedAlias(table, column)) = self.column_bindings + .get(&var) + .and_then(|vec| vec.get(0).cloned()) { + self.constrain_column_to_constant(table, column, value); + } + + Ok(()) + } + + /// Take a relation: a matrix of values which will successively bind to named variables of + /// the provided types. + /// Construct a computed table to yield this relation. + /// This function will panic if some invariants are not met. + fn collect_named_bindings<'s>(&mut self, schema: &'s Schema, names: Vec, types: Vec, values: Vec) { + if values.is_empty() { + return; + } + + assert!(!names.is_empty()); + assert_eq!(names.len(), types.len()); + assert!(values.len() >= names.len()); + assert_eq!(values.len() % names.len(), 0); // It's an exact multiple. + + let named_values = ComputedTable::NamedValues { + names: names.clone(), + values: values, + }; + + let table = self.computed_tables.push_computed(named_values); + let alias = self.next_alias_for_table(table); + + // Stitch the computed table into column_bindings, so we get cross-linking. + for (name, ty) in names.iter().zip(types.into_iter()) { + self.constrain_var_to_type(name.clone(), ty); + self.bind_column_to_var(schema, alias.clone(), VariableColumn::Variable(name.clone()), name.clone()); + } + + self.from.push(SourceAlias(table, alias)); + } + + pub fn apply_ground<'s>(&mut self, schema: &'s Schema, where_fn: WhereFn) -> Result<()> { + if where_fn.args.len() != 1 { + bail!(ErrorKind::InvalidNumberOfArguments(where_fn.operator.clone(), where_fn.args.len(), 1)); + } + + let mut args = where_fn.args.into_iter(); + + if where_fn.binding.is_empty() { + // The binding must introduce at least one bound variable. + bail!(ErrorKind::InvalidBinding(where_fn.operator.clone(), BindingError::NoBoundVariable)); + } + + if !where_fn.binding.is_valid() { + // The binding must not duplicate bound variables. + bail!(ErrorKind::InvalidBinding(where_fn.operator.clone(), BindingError::RepeatedBoundVariable)); + } + + // Scalar and tuple bindings are a little special: because there's only one value, + // we can immediately substitute the value as a known value in the CC, additionally + // generating a WHERE clause if columns have already been bound. + match (where_fn.binding, args.next().unwrap()) { + (Binding::BindScalar(var), constant) => + self.apply_ground_var(schema, var, constant), + + (Binding::BindTuple(places), FnArg::Vector(children)) => { + // Just the same, but we bind more than one column at a time. + if children.len() != places.len() { + // Number of arguments don't match the number of values. TODO: better error message. + bail!(ErrorKind::GroundBindingsMismatch); + } + for (place, arg) in places.into_iter().zip(children.into_iter()) { + self.apply_ground_place(schema, place, arg)? // TODO: short-circuit on impossible. + } + Ok(()) + }, + + // Collection bindings and rel bindings are similar in that they are both + // implemented as a subquery with a projection list and a set of values. + // The difference is that BindColl has only a single variable, and its values + // are all in a single structure. That makes it substantially simpler! + (Binding::BindColl(var), FnArg::Vector(children)) => { + if children.is_empty() { + bail!(ErrorKind::InvalidGroundConstant); + } + + // Turn a collection of arguments into a Vec of `TypedValue`s of the same type. + let known_types = self.known_type_set(&var); + // Check that every value has the same type. + let mut accumulated_types = ValueTypeSet::none(); + let mut skip: Option = None; + let values = children.into_iter() + .filter_map(|arg| -> Option> { + // We need to get conversion errors out. + // We also want to mark known-empty on impossibilty, but + // still detect serious errors. + match self.typed_value_from_arg(schema, &var, arg, known_types) { + Ok(ValueConversion::Val(tv)) => { + if accumulated_types.insert(tv.value_type()) && + !accumulated_types.is_unit() { + // Values not all of the same type. + Some(Err(ErrorKind::InvalidGroundConstant.into())) + } else { + Some(Ok(tv)) + } + }, + Ok(ValueConversion::Impossible(because)) => { + // Skip this value. + skip = Some(because); + None + }, + Err(e) => Some(Err(e.into())), + } + }) + .collect::>>()?; + + if values.is_empty() { + let because = skip.expect("we skipped all rows for a reason"); + self.mark_known_empty(because); + return Ok(()); + } + + // Otherwise, we now have the values and the type. + let types = vec![accumulated_types.exemplar().unwrap()]; + let names = vec![var.clone()]; + + self.collect_named_bindings(schema, names, types, values); + Ok(()) + }, + + (Binding::BindRel(places), FnArg::Vector(rows)) => { + if rows.is_empty() { + bail!(ErrorKind::InvalidGroundConstant); + } + + // Grab the known types to which these args must conform, and track + // the places that won't be bound in the output. + let template: Vec> = + places.iter() + .map(|x| match x { + &VariableOrPlaceholder::Placeholder => None, + &VariableOrPlaceholder::Variable(ref v) => Some((v.clone(), self.known_type_set(v))), + }) + .collect(); + + // The expected 'width' of the matrix is the number of named variables. + let full_width = places.len(); + let names: Vec = places.into_iter().filter_map(|x| x.into_var()).collect(); + let expected_width = names.len(); + let expected_rows = rows.len(); + + if expected_width == 0 { + // They can't all be placeholders. + bail!(ErrorKind::InvalidGroundConstant); + } + + // Accumulate values into `matrix` and types into `a_t_f_c`. + // This representation of a rectangular matrix is more efficient than one composed + // of N separate vectors. + let mut matrix = Vec::with_capacity(expected_width * expected_rows); + let mut accumulated_types_for_columns = vec![ValueTypeSet::none(); expected_width]; + + // Loop so we can bail out. + let mut skipped_all: Option = None; + for row in rows.into_iter() { + match row { + FnArg::Vector(cols) => { + // Make sure that every row is the same length. + if cols.len() != full_width { + bail!(ErrorKind::InvalidGroundConstant); + } + + // TODO: don't accumulate twice. + let mut vals = Vec::with_capacity(expected_width); + let mut skip: Option = None; + for (col, pair) in cols.into_iter().zip(template.iter()) { + // Now we have (val, Option<(name, known_types)>). Silly, + // but this is how we iter! + // Convert each item in the row. + // If any value in the row is impossible, then skip the row. + // If all rows are impossible, fail the entire CC. + if let &Some(ref pair) = pair { + match self.typed_value_from_arg(schema, &pair.0, col, pair.1)? { + ValueConversion::Val(tv) => vals.push(tv), + ValueConversion::Impossible(because) => { + // Skip this row. It cannot produce bindings. + skip = Some(because); + break; + }, + } + } + } + + if skip.is_some() { + // Skip this row and record why, in case we skip all. + skipped_all = skip; + continue; + } + + // Accumulate the values into the matrix and the types into the type set. + for (val, acc) in vals.into_iter().zip(accumulated_types_for_columns.iter_mut()) { + let inserted = acc.insert(val.value_type()); + if inserted && !acc.is_unit() { + // Heterogeneous types. + bail!(ErrorKind::InvalidGroundConstant); + } + matrix.push(val); + } + + }, + _ => bail!(ErrorKind::InvalidGroundConstant), + } + } + + // Do we have rows? If not, the CC cannot succeed. + if matrix.is_empty() { + // We will either have bailed or will have accumulated *something* into the matrix, + // so we can safely unwrap here. + self.mark_known_empty(skipped_all.expect("we skipped for a reason")); + return Ok(()); + } + + // Take the single type from each set. We know there's only one: we got at least one + // type, 'cos we bailed out for zero rows, and we also bailed out each time we + // inserted a second type. + // By restricting to homogeneous columns, we greatly simplify projection. In the + // future, we could loosen this restriction, at the cost of projecting (some) value + // type tags. If and when we want to algebrize in two phases and allow for + // late-binding input variables, we'll probably be able to loosen this restriction + // with little penalty. + let types = accumulated_types_for_columns.into_iter() + .map(|x| x.exemplar().unwrap()) + .collect(); + self.collect_named_bindings(schema, names, types, matrix); + Ok(()) + }, + (_, _) => bail!(ErrorKind::InvalidGroundConstant), + } + } +} + +#[cfg(test)] +mod testing { + use super::*; + + use mentat_core::{ + Attribute, + ValueType, + }; + + use mentat_query::{ + Binding, + FnArg, + NamespacedKeyword, + PlainSymbol, + Variable, + }; + + use clauses::{ + add_attribute, + associate_ident, + }; + + use types::{ + ValueTypeSet, + }; + + #[test] + fn test_apply_ground() { + let vz = Variable::from_valid_name("?z"); + + let mut cc = ConjoiningClauses::default(); + let mut schema = Schema::default(); + + associate_ident(&mut schema, NamespacedKeyword::new("foo", "fts"), 100); + add_attribute(&mut schema, 100, Attribute { + value_type: ValueType::String, + index: true, + fulltext: true, + ..Default::default() + }); + + // It's awkward enough to write these expansions that we give the details for the simplest + // case only. See the tests of the translator for more extensive (albeit looser) coverage. + let op = PlainSymbol::new("ground"); + cc.apply_ground(&schema, WhereFn { + operator: op, + args: vec![ + FnArg::EntidOrInteger(10), + ], + binding: Binding::BindScalar(vz.clone()), + }).expect("to be able to apply_ground"); + + assert!(!cc.is_known_empty()); + + // Finally, expand column bindings. + cc.expand_column_bindings(); + assert!(!cc.is_known_empty()); + + let clauses = cc.wheres; + assert_eq!(clauses.len(), 0); + + let column_bindings = cc.column_bindings; + assert_eq!(column_bindings.len(), 0); // Scalar doesn't need this. + + let known_types = cc.known_types; + assert_eq!(known_types.len(), 1); + assert_eq!(known_types.get(&vz).expect("to know the type of ?z"), + &ValueTypeSet::of_one(ValueType::Long)); + + let value_bindings = cc.value_bindings; + assert_eq!(value_bindings.len(), 1); + assert_eq!(value_bindings.get(&vz).expect("to have a value for ?z"), + &TypedValue::Long(10)); // We default to Long instead of entid. + } +} diff --git a/query-algebrizer/src/errors.rs b/query-algebrizer/src/errors.rs index 0c93011d..852e07fc 100644 --- a/query-algebrizer/src/errors.rs +++ b/query-algebrizer/src/errors.rs @@ -10,12 +10,20 @@ extern crate mentat_query; -use mentat_core::ValueType; +use mentat_core::{ + ValueType, +}; use self::mentat_query::{ PlainSymbol, }; +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum BindingError { + NoBoundVariable, + RepeatedBoundVariable, // TODO: include repeated variable(s). +} + error_chain! { types { Error, ErrorKind, ResultExt, Result; @@ -32,9 +40,9 @@ error_chain! { display("no function named {}", name) } - InvalidNumberOfArguments(name: PlainSymbol, number: usize, expected: usize) { + InvalidNumberOfArguments(function: PlainSymbol, number: usize, expected: usize) { description("invalid number of arguments") - display("invalid number of arguments to {}: expected {}, got {}.", name, expected, number) + display("invalid number of arguments to {}: expected {}, got {}.", function, expected, number) } UnboundVariable(name: PlainSymbol) { @@ -42,6 +50,21 @@ error_chain! { display("unbound variable: {}", name) } + InvalidBinding(function: PlainSymbol, binding_error: BindingError) { + description("invalid binding") + display("invalid binding for {}: {:?}.", function, binding_error) + } + + GroundBindingsMismatch { + description("mismatched bindings in ground") + display("mismatched bindings in ground") + } + + InvalidGroundConstant { + // TODO: flesh this out. + description("invalid expression in ground constant") + display("invalid expression in ground constant") + } InvalidArgument(function: PlainSymbol, expected_type: &'static str, position: usize) { description("invalid argument") diff --git a/query-algebrizer/src/lib.rs b/query-algebrizer/src/lib.rs index 9af4f0c2..9bf9b8d1 100644 --- a/query-algebrizer/src/lib.rs +++ b/query-algebrizer/src/lib.rs @@ -46,6 +46,7 @@ use mentat_query::{ }; pub use errors::{ + BindingError, Error, ErrorKind, Result, diff --git a/query-algebrizer/src/types.rs b/query-algebrizer/src/types.rs index f4761780..ab7222bb 100644 --- a/query-algebrizer/src/types.rs +++ b/query-algebrizer/src/types.rs @@ -53,6 +53,10 @@ pub enum ComputedTable { type_extraction: BTreeSet, arms: Vec<::clauses::ConjoiningClauses>, }, + NamedValues { + names: Vec, + values: Vec, + }, } impl DatomsTable { @@ -419,8 +423,8 @@ impl Debug for ColumnConstraint { #[derive(PartialEq, Clone)] pub enum EmptyBecause { - // Var, existing, desired. - TypeMismatch(Variable, ValueTypeSet, ValueType), + ConflictingBindings { var: Variable, existing: TypedValue, desired: TypedValue }, + TypeMismatch { var: Variable, existing: ValueTypeSet, desired: ValueTypeSet }, NoValidTypes(Variable), NonNumericArgument, NonStringFulltextValue, @@ -436,7 +440,11 @@ impl Debug for EmptyBecause { fn fmt(&self, f: &mut Formatter) -> ::std::fmt::Result { use self::EmptyBecause::*; match self { - &TypeMismatch(ref var, ref existing, ref desired) => { + &ConflictingBindings { ref var, ref existing, ref desired } => { + write!(f, "Var {:?} can't be {:?} because it's already bound to {:?}", + var, desired, existing) + }, + &TypeMismatch { ref var, ref existing, ref desired } => { write!(f, "Type mismatch: {:?} can't be {:?}, because it's already {:?}", var, desired, existing) }, @@ -573,4 +581,4 @@ impl ValueTypeSet { pub fn is_unit(&self) -> bool { self.0.len() == 1 } -} \ No newline at end of file +} diff --git a/query-algebrizer/tests/ground.rs b/query-algebrizer/tests/ground.rs new file mode 100644 index 00000000..0a2bdf69 --- /dev/null +++ b/query-algebrizer/tests/ground.rs @@ -0,0 +1,315 @@ +// Copyright 2016 Mozilla +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use +// this file except in compliance with the License. You may obtain a copy of the +// License at http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +extern crate mentat_core; +extern crate mentat_query; +extern crate mentat_query_algebrizer; +extern crate mentat_query_parser; + +use mentat_core::{ + Attribute, + Entid, + Schema, + ValueType, + TypedValue, +}; + +use mentat_query_parser::{ + parse_find_string, +}; + +use mentat_query::{ + NamespacedKeyword, + PlainSymbol, + Variable, +}; + +use mentat_query_algebrizer::{ + BindingError, + ConjoiningClauses, + ComputedTable, + Error, + ErrorKind, + algebrize, +}; + +// These are helpers that tests use to build Schema instances. +#[cfg(test)] +fn associate_ident(schema: &mut Schema, i: NamespacedKeyword, e: Entid) { + schema.entid_map.insert(e, i.clone()); + schema.ident_map.insert(i.clone(), e); +} + +#[cfg(test)] +fn add_attribute(schema: &mut Schema, e: Entid, a: Attribute) { + schema.schema_map.insert(e, a); +} + +fn prepopulated_schema() -> Schema { + let mut schema = Schema::default(); + associate_ident(&mut schema, NamespacedKeyword::new("foo", "name"), 65); + associate_ident(&mut schema, NamespacedKeyword::new("foo", "knows"), 66); + associate_ident(&mut schema, NamespacedKeyword::new("foo", "parent"), 67); + associate_ident(&mut schema, NamespacedKeyword::new("foo", "age"), 68); + associate_ident(&mut schema, NamespacedKeyword::new("foo", "height"), 69); + add_attribute(&mut schema, 65, Attribute { + value_type: ValueType::String, + multival: false, + ..Default::default() + }); + add_attribute(&mut schema, 66, Attribute { + value_type: ValueType::Ref, + multival: true, + ..Default::default() + }); + add_attribute(&mut schema, 67, Attribute { + value_type: ValueType::String, + multival: true, + ..Default::default() + }); + add_attribute(&mut schema, 68, Attribute { + value_type: ValueType::Long, + multival: false, + ..Default::default() + }); + add_attribute(&mut schema, 69, Attribute { + value_type: ValueType::Long, + multival: false, + ..Default::default() + }); + schema +} + +fn bails(schema: &Schema, input: &str) -> Error { + let parsed = parse_find_string(input).expect("query input to have parsed"); + algebrize(schema.into(), parsed).expect_err("algebrize to have failed") +} + +fn alg(schema: &Schema, input: &str) -> ConjoiningClauses { + let parsed = parse_find_string(input).expect("query input to have parsed"); + algebrize(schema.into(), parsed).expect("algebrizing to have succeeded").cc +} + +#[test] +fn test_ground_doesnt_bail_for_type_conflicts() { + // We know `?x` to be a ref, but we're attempting to ground it to a Double. + // The query can return no results. + let q = r#"[:find ?x :where [?x :foo/knows ?p] [(ground 9.95) ?x]]"#; + let schema = prepopulated_schema(); + let cc = alg(&schema, &q); + assert!(cc.empty_because.is_some()); +} + +#[test] +fn test_ground_tuple_fails_impossible() { + let q = r#"[:find ?x :where [?x :foo/knows ?p] [(ground [5 9.95]) [?x ?p]]]"#; + let schema = prepopulated_schema(); + let cc = alg(&schema, &q); + assert!(cc.empty_because.is_some()); +} + +#[test] +fn test_ground_scalar_fails_impossible() { + let q = r#"[:find ?x :where [?x :foo/knows ?p] [(ground true) ?p]]"#; + let schema = prepopulated_schema(); + let cc = alg(&schema, &q); + assert!(cc.empty_because.is_some()); +} + +#[test] +fn test_ground_coll_skips_impossible() { + // We know `?x` to be a ref, but we're attempting to ground it to a Double. + // The query can return no results. + let q = r#"[:find ?x :where [?x :foo/knows ?p] [(ground [5 9.95 11]) [?x ...]]]"#; + let schema = prepopulated_schema(); + let cc = alg(&schema, &q); + assert!(cc.empty_because.is_none()); + assert_eq!(cc.computed_tables[0], ComputedTable::NamedValues { + names: vec![Variable::from_valid_name("?x")], + values: vec![TypedValue::Ref(5), TypedValue::Ref(11)], + }); +} + +#[test] +fn test_ground_coll_fails_if_all_impossible() { + let q = r#"[:find ?x :where [?x :foo/knows ?p] [(ground [5.1 5.2]) [?p ...]]]"#; + let schema = prepopulated_schema(); + let cc = alg(&schema, &q); + assert!(cc.empty_because.is_some()); +} + +#[test] +fn test_ground_rel_skips_impossible() { + let q = r#"[:find ?x :where [?x :foo/knows ?p] [(ground [[8 "foo"] [5 7] [9.95 9] [11 12]]) [[?x ?p]]]]"#; + let schema = prepopulated_schema(); + let cc = alg(&schema, &q); + assert!(cc.empty_because.is_none()); + assert_eq!(cc.computed_tables[0], ComputedTable::NamedValues { + names: vec![Variable::from_valid_name("?x"), Variable::from_valid_name("?p")], + values: vec![TypedValue::Ref(5), TypedValue::Ref(7), TypedValue::Ref(11), TypedValue::Ref(12)], + }); +} + +#[test] +fn test_ground_rel_fails_if_all_impossible() { + let q = r#"[:find ?x :where [?x :foo/knows ?p] [(ground [[11 5.1] [12 5.2]]) [[?x ?p]]]]"#; + let schema = prepopulated_schema(); + let cc = alg(&schema, &q); + assert!(cc.empty_because.is_some()); +} + +#[test] +fn test_ground_tuple_rejects_all_placeholders() { + let q = r#"[:find ?x :where [?x :foo/knows ?p] [(ground [8 "foo" 3]) [_ _ _]]]"#; + let schema = prepopulated_schema(); + bails(&schema, &q); +} + +#[test] +fn test_ground_rel_rejects_all_placeholders() { + let q = r#"[:find ?x :where [?x :foo/knows ?p] [(ground [[8 "foo"]]) [[_ _]]]]"#; + let schema = prepopulated_schema(); + bails(&schema, &q); +} + +#[test] +fn test_ground_tuple_placeholders() { + let q = r#"[:find ?x :where [?x :foo/knows ?p] [(ground [8 "foo" 3]) [?x _ ?p]]]"#; + let schema = prepopulated_schema(); + let cc = alg(&schema, &q); + assert!(cc.empty_because.is_none()); + assert_eq!(cc.bound_value(&Variable::from_valid_name("?x")), Some(TypedValue::Ref(8))); + assert_eq!(cc.bound_value(&Variable::from_valid_name("?p")), Some(TypedValue::Ref(3))); +} + +#[test] +fn test_ground_rel_placeholders() { + let q = r#"[:find ?x :where [?x :foo/knows ?p] [(ground [[8 "foo" 3] [5 false 7] [5 9.95 9]]) [[?x _ ?p]]]]"#; + let schema = prepopulated_schema(); + let cc = alg(&schema, &q); + assert!(cc.empty_because.is_none()); + assert_eq!(cc.computed_tables[0], ComputedTable::NamedValues { + names: vec![Variable::from_valid_name("?x"), Variable::from_valid_name("?p")], + values: vec![ + TypedValue::Ref(8), + TypedValue::Ref(3), + TypedValue::Ref(5), + TypedValue::Ref(7), + TypedValue::Ref(5), + TypedValue::Ref(9), + ], + }); +} + +// Nothing to do with ground, but while we're here… +#[test] +fn test_multiple_reference_type_failure() { + let q = r#"[:find ?x :where [?x :foo/age ?y] [?x :foo/knows ?y]]"#; + let schema = prepopulated_schema(); + let cc = alg(&schema, &q); + assert!(cc.empty_because.is_some()); +} + +#[test] +fn test_ground_tuple_infers_types() { + let q = r#"[:find ?x :where [?x :foo/age ?v] [(ground [8 10]) [?x ?v]]]"#; + let schema = prepopulated_schema(); + let cc = alg(&schema, &q); + assert!(cc.empty_because.is_none()); + assert_eq!(cc.bound_value(&Variable::from_valid_name("?x")), Some(TypedValue::Ref(8))); + assert_eq!(cc.bound_value(&Variable::from_valid_name("?v")), Some(TypedValue::Long(10))); +} + +#[test] +fn test_ground_rel_infers_types() { + let q = r#"[:find ?x :where [?x :foo/age ?v] [(ground [[8 10]]) [[?x ?v]]]]"#; + let schema = prepopulated_schema(); + let cc = alg(&schema, &q); + assert!(cc.empty_because.is_none()); + assert_eq!(cc.computed_tables[0], ComputedTable::NamedValues { + names: vec![Variable::from_valid_name("?x"), Variable::from_valid_name("?v")], + values: vec![TypedValue::Ref(8), TypedValue::Long(10)], + }); +} + +#[test] +fn test_ground_coll_heterogeneous_types() { + let q = r#"[:find ?x :where [?x _ ?v] [(ground [false 8.5]) [?v ...]]]"#; + let schema = prepopulated_schema(); + let e = bails(&schema, &q); + match e { + Error(ErrorKind::InvalidGroundConstant, _) => { + }, + _ => { + panic!(); + }, + } +} + +#[test] +fn test_ground_rel_heterogeneous_types() { + let q = r#"[:find ?x :where [?x _ ?v] [(ground [[false] [5]]) [[?v]]]]"#; + let schema = prepopulated_schema(); + let e = bails(&schema, &q); + match e { + Error(ErrorKind::InvalidGroundConstant, _) => { + }, + _ => { + panic!(); + }, + } +} + +#[test] +fn test_ground_tuple_duplicate_vars() { + let q = r#"[:find ?x :where [?x :foo/age ?v] [(ground [8 10]) [?x ?x]]]"#; + let schema = prepopulated_schema(); + let e = bails(&schema, &q); + match e { + Error(ErrorKind::InvalidBinding(v, e), _) => { + assert_eq!(v, PlainSymbol::new("ground")); + assert_eq!(e, BindingError::RepeatedBoundVariable); + }, + _ => { + panic!(); + }, + } +} + +#[test] +fn test_ground_rel_duplicate_vars() { + let q = r#"[:find ?x :where [?x :foo/age ?v] [(ground [[8 10]]) [[?x ?x]]]]"#; + let schema = prepopulated_schema(); + let e = bails(&schema, &q); + match e { + Error(ErrorKind::InvalidBinding(v, e), _) => { + assert_eq!(v, PlainSymbol::new("ground")); + assert_eq!(e, BindingError::RepeatedBoundVariable); + }, + _ => { + panic!(); + }, + } +} + +#[test] +fn test_ground_nonexistent_variable_invalid() { + let q = r#"[:find ?x ?e :where [?e _ ?x] (not [(ground 17) ?v])]"#; + let schema = prepopulated_schema(); + let e = bails(&schema, &q); + match e { + Error(ErrorKind::UnboundVariable(PlainSymbol(v)), _) => { + assert_eq!(v, "?v".to_string()); + }, + _ => { + panic!(); + }, + } +} diff --git a/query-translator/src/translate.rs b/query-translator/src/translate.rs index f755af65..0928aa90 100644 --- a/query-translator/src/translate.rs +++ b/query-translator/src/translate.rs @@ -52,6 +52,7 @@ use mentat_query_sql::{ SelectQuery, TableList, TableOrSubquery, + Values, }; trait ToConstraint { @@ -240,7 +241,13 @@ fn table_for_computed(computed: ComputedTable, alias: TableAlias) -> TableOrSubq }, ComputedTable::Subquery(subquery) => { TableOrSubquery::Subquery(Box::new(cc_to_exists(subquery))) - } + }, + ComputedTable::NamedValues { + names, values, + } => { + // We assume column homogeneity, so we won't have any type tag columns. + TableOrSubquery::Values(Values::Named(names, values), alias) + }, } } diff --git a/query-translator/tests/translate.rs b/query-translator/tests/translate.rs index 5c28c56b..a9c42274 100644 --- a/query-translator/tests/translate.rs +++ b/query-translator/tests/translate.rs @@ -550,3 +550,177 @@ fn test_complex_nested_or_join_type_projection() { LIMIT 1"); assert_eq!(args, vec![]); } + +#[test] +fn test_ground_scalar() { + let schema = prepopulated_schema(); + + // Verify that we accept inline constants. + let query = r#"[:find ?x . :where [(ground "yyy") ?x]]"#; + let SQLQuery { sql, args } = translate(&schema, query); + assert_eq!(sql, "SELECT $v0 AS `?x` LIMIT 1"); + assert_eq!(args, vec![make_arg("$v0", "yyy")]); + + // Verify that we accept bound input constants. + let query = r#"[:find ?x . :in ?v :where [(ground ?v) ?x]]"#; + let inputs = QueryInputs::with_value_sequence(vec![(Variable::from_valid_name("?v"), TypedValue::String(Rc::new("aaa".into())))]); + let SQLQuery { sql, args } = translate_with_inputs(&schema, query, inputs); + assert_eq!(sql, "SELECT $v0 AS `?x` LIMIT 1"); + assert_eq!(args, vec![make_arg("$v0", "aaa"),]); +} + +#[test] +fn test_ground_tuple() { + let schema = prepopulated_schema(); + + // Verify that we accept inline constants. + let query = r#"[:find ?x ?y :where [(ground [1 "yyy"]) [?x ?y]]]"#; + let SQLQuery { sql, args } = translate(&schema, query); + assert_eq!(sql, "SELECT DISTINCT 1 AS `?x`, $v0 AS `?y`"); + assert_eq!(args, vec![make_arg("$v0", "yyy")]); + + // Verify that we accept bound input constants. + let query = r#"[:find [?x ?y] :in ?u ?v :where [(ground [?u ?v]) [?x ?y]]]"#; + let inputs = QueryInputs::with_value_sequence(vec![(Variable::from_valid_name("?u"), TypedValue::Long(2)), + (Variable::from_valid_name("?v"), TypedValue::String(Rc::new("aaa".into()))),]); + let SQLQuery { sql, args } = translate_with_inputs(&schema, query, inputs); + // TODO: treat 2 as an input variable that could be bound late, rather than eagerly binding it. + assert_eq!(sql, "SELECT 2 AS `?x`, $v0 AS `?y` LIMIT 1"); + assert_eq!(args, vec![make_arg("$v0", "aaa"),]); +} + +#[test] +fn test_ground_coll() { + let schema = prepopulated_schema(); + + // Verify that we accept inline constants. + let query = r#"[:find ?x :where [(ground ["xxx" "yyy"]) [?x ...]]]"#; + let SQLQuery { sql, args } = translate(&schema, query); + assert_eq!(sql, "SELECT DISTINCT `c00`.`?x` AS `?x` FROM \ + (SELECT 0 AS `?x` WHERE 0 UNION ALL VALUES ($v0), ($v1)) AS `c00`"); + assert_eq!(args, vec![make_arg("$v0", "xxx"), + make_arg("$v1", "yyy")]); + + // Verify that we accept bound input constants. + let query = r#"[:find ?x :in ?u ?v :where [(ground [?u ?v]) [?x ...]]]"#; + let inputs = QueryInputs::with_value_sequence(vec![(Variable::from_valid_name("?u"), TypedValue::Long(2)), + (Variable::from_valid_name("?v"), TypedValue::Long(3)),]); + let SQLQuery { sql, args } = translate_with_inputs(&schema, query, inputs); + // TODO: treat 2 and 3 as input variables that could be bound late, rather than eagerly binding. + assert_eq!(sql, "SELECT DISTINCT `c00`.`?x` AS `?x` FROM \ + (SELECT 0 AS `?x` WHERE 0 UNION ALL VALUES (2), (3)) AS `c00`"); + assert_eq!(args, vec![]); +} + +#[test] +fn test_ground_rel() { + let schema = prepopulated_schema(); + + // Verify that we accept inline constants. + let query = r#"[:find ?x ?y :where [(ground [[1 "xxx"] [2 "yyy"]]) [[?x ?y]]]]"#; + let SQLQuery { sql, args } = translate(&schema, query); + assert_eq!(sql, "SELECT DISTINCT `c00`.`?x` AS `?x`, `c00`.`?y` AS `?y` FROM \ + (SELECT 0 AS `?x`, 0 AS `?y` WHERE 0 UNION ALL VALUES (1, $v0), (2, $v1)) AS `c00`"); + assert_eq!(args, vec![make_arg("$v0", "xxx"), + make_arg("$v1", "yyy")]); + + // Verify that we accept bound input constants. + let query = r#"[:find ?x ?y :in ?u ?v :where [(ground [[?u 1] [?v 2]]) [[?x ?y]]]]"#; + let inputs = QueryInputs::with_value_sequence(vec![(Variable::from_valid_name("?u"), TypedValue::Long(3)), + (Variable::from_valid_name("?v"), TypedValue::Long(4)),]); + let SQLQuery { sql, args } = translate_with_inputs(&schema, query, inputs); + // TODO: treat 3 and 4 as input variables that could be bound late, rather than eagerly binding. + assert_eq!(sql, "SELECT DISTINCT `c00`.`?x` AS `?x`, `c00`.`?y` AS `?y` FROM \ + (SELECT 0 AS `?x`, 0 AS `?y` WHERE 0 UNION ALL VALUES (3, 1), (4, 2)) AS `c00`"); + assert_eq!(args, vec![]); +} + +#[test] +fn test_compound_with_ground() { + let schema = prepopulated_schema(); + + // Verify that we can use the resulting CCs as children in compound CCs. + let query = r#"[:find ?x :where (or [(ground "yyy") ?x] + [(ground "zzz") ?x])]"#; + let SQLQuery { sql, args } = translate(&schema, query); + + // This is confusing because the computed tables (like `c00`) are numbered sequentially in each + // arm of the `or` rather than numbered globally. But SQLite scopes the names correctly, so it + // works. In the future, we might number the computed tables globally to make this more clear. + assert_eq!(sql, "SELECT DISTINCT `c00`.`?x` AS `?x` FROM (\ + SELECT $v0 AS `?x` UNION \ + SELECT $v1 AS `?x`) AS `c00`"); + assert_eq!(args, vec![make_arg("$v0", "yyy"), + make_arg("$v1", "zzz"),]); + + // Verify that we can use ground to constrain the bindings produced by earlier clauses. + let query = r#"[:find ?x . :where [_ :foo/bar ?x] [(ground "yyy") ?x]]"#; + let SQLQuery { sql, args } = translate(&schema, query); + assert_eq!(sql, "SELECT $v0 AS `?x` FROM `datoms` AS `datoms00` \ + WHERE `datoms00`.a = 99 AND `datoms00`.v = $v0 LIMIT 1"); + + assert_eq!(args, vec![make_arg("$v0", "yyy")]); + + // Verify that we can further constrain the bindings produced by our clause. + let query = r#"[:find ?x . :where [(ground "yyy") ?x] [_ :foo/bar ?x]]"#; + let SQLQuery { sql, args } = translate(&schema, query); + assert_eq!(sql, "SELECT $v0 AS `?x` FROM `datoms` AS `datoms00` \ + WHERE `datoms00`.a = 99 AND `datoms00`.v = $v0 LIMIT 1"); + + assert_eq!(args, vec![make_arg("$v0", "yyy")]); +} + +#[test] +fn test_unbound_attribute_with_ground_entity() { + let query = r#"[:find ?x ?v :where [?x _ ?v] (not [(ground 17) ?x])]"#; + let schema = prepopulated_schema(); + let SQLQuery { sql, .. } = translate(&schema, query); + assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x`, \ + `all_datoms00`.v AS `?v`, \ + `all_datoms00`.value_type_tag AS `?v_value_type_tag` \ + FROM `all_datoms` AS `all_datoms00` \ + WHERE NOT EXISTS (SELECT 1 WHERE `all_datoms00`.e = 17)"); +} + +#[test] +fn test_unbound_attribute_with_ground() { + // TODO: this needs to expand the type code. #475. + let query = r#"[:find ?x :where [?x _ ?v] (not [(ground 5) ?v])]"#; + let schema = prepopulated_schema(); + let SQLQuery { sql, .. } = translate(&schema, query); + assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x` FROM `all_datoms` AS `all_datoms00` \ + WHERE NOT EXISTS (SELECT 1 WHERE `all_datoms00`.v = 5)"); +} + + +#[test] +fn test_not_with_ground() { + let mut schema = prepopulated_schema(); + associate_ident(&mut schema, NamespacedKeyword::new("db", "valueType"), 7); + associate_ident(&mut schema, NamespacedKeyword::new("db.type", "ref"), 23); + associate_ident(&mut schema, NamespacedKeyword::new("db.type", "bool"), 28); + associate_ident(&mut schema, NamespacedKeyword::new("db.type", "instant"), 29); + add_attribute(&mut schema, 7, Attribute { + value_type: ValueType::Ref, + multival: false, + ..Default::default() + }); + + // Scalar. + // TODO: this kind of simple `not` should be implemented without the subquery. #476. + let query = r#"[:find ?x :where [?x :db/valueType ?v] (not [(ground :db.type/instant) ?v])]"#; + let SQLQuery { sql, .. } = translate(&schema, query); + assert_eq!(sql, + "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 7 AND NOT \ + EXISTS (SELECT 1 WHERE `datoms00`.v = 29)"); + + // Coll. + // TODO: we can generate better SQL for this, too. #476. + let query = r#"[:find ?x :where [?x :db/valueType ?v] (not [(ground [:db.type/bool :db.type/instant]) [?v ...]])]"#; + let SQLQuery { sql, .. } = translate(&schema, query); + assert_eq!(sql, + "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` \ + WHERE `datoms00`.a = 7 AND NOT EXISTS \ + (SELECT 1 FROM (SELECT 0 AS `?v` WHERE 0 UNION ALL VALUES (28), (29)) AS `c00` \ + WHERE `datoms00`.v = `c00`.`?v`)"); +} diff --git a/query/src/lib.rs b/query/src/lib.rs index abd35868..c412baac 100644 --- a/query/src/lib.rs +++ b/query/src/lib.rs @@ -35,6 +35,7 @@ extern crate mentat_core; use std::collections::{ BTreeSet, + HashSet, }; use std::fmt; @@ -59,7 +60,7 @@ use mentat_core::{ pub type SrcVarName = String; // Do not include the required syntactic '$'. -#[derive(Clone, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] pub struct Variable(pub Rc); impl Variable { @@ -536,21 +537,90 @@ impl FindSpec { // Datomic accepts variable or placeholder. DataScript accepts recursive bindings. Mentat sticks // to the non-recursive form Datomic accepts, which is much simpler to process. -#[derive(Clone, Debug, Eq, PartialEq)] +#[derive(Clone, Debug, Eq, Hash, PartialEq)] pub enum VariableOrPlaceholder { Placeholder, Variable(Variable), } +impl VariableOrPlaceholder { + pub fn into_var(self) -> Option { + match self { + VariableOrPlaceholder::Placeholder => None, + VariableOrPlaceholder::Variable(var) => Some(var), + } + } + + pub fn var(&self) -> Option<&Variable> { + match self { + &VariableOrPlaceholder::Placeholder => None, + &VariableOrPlaceholder::Variable(ref var) => Some(var), + } + } +} + #[derive(Clone,Debug,Eq,PartialEq)] pub enum Binding { - BindRel(Vec), - - BindColl(Variable), - - BindTuple(Vec), - BindScalar(Variable), + BindColl(Variable), + BindRel(Vec), + BindTuple(Vec), +} + +impl Binding { + /// Return each variable or `None`, in order. + pub fn variables(&self) -> Vec> { + match self { + &Binding::BindScalar(ref var) | &Binding::BindColl(ref var) => vec![Some(var.clone())], + &Binding::BindRel(ref vars) | &Binding::BindTuple(ref vars) => vars.iter().map(|x| x.var().cloned()).collect(), + } + } + + /// Return `true` if no variables are bound, i.e., all binding entries are placeholders. + pub fn is_empty(&self) -> bool { + match self { + &Binding::BindScalar(_) | &Binding::BindColl(_) => false, + &Binding::BindRel(ref vars) | &Binding::BindTuple(ref vars) => vars.iter().all(|x| x.var().is_none()), + } + } + + /// Return `true` if no variable is bound twice, i.e., each binding entry is either a + /// placeholder or unique. + /// + /// ``` + /// extern crate mentat_query; + /// use std::rc::Rc; + /// + /// let v = mentat_query::Variable::from_valid_name("?foo"); + /// let vv = mentat_query::VariableOrPlaceholder::Variable(v); + /// let p = mentat_query::VariableOrPlaceholder::Placeholder; + /// + /// let e = mentat_query::Binding::BindTuple(vec![p.clone()]); + /// let b = mentat_query::Binding::BindTuple(vec![p.clone(), vv.clone()]); + /// let d = mentat_query::Binding::BindTuple(vec![vv.clone(), p, vv]); + /// assert!(b.is_valid()); // One var, one placeholder: OK. + /// assert!(!e.is_valid()); // Empty: not OK. + /// assert!(!d.is_valid()); // Duplicate var: not OK. + /// ``` + pub fn is_valid(&self) -> bool { + match self { + &Binding::BindScalar(_) | &Binding::BindColl(_) => true, + &Binding::BindRel(ref vars) | &Binding::BindTuple(ref vars) => { + let mut acc = HashSet::::new(); + for var in vars { + if let &VariableOrPlaceholder::Variable(ref var) = var { + if !acc.insert(var.clone()) { + // It's invalid if there was an equal var already present in the set -- + // i.e., we have a duplicate var. + return false; + } + } + } + // We're not valid if every place is a placeholder! + !acc.is_empty() + } + } + } } // Note that the "implicit blank" rule applies.