diff --git a/parser-utils/src/lib.rs b/parser-utils/src/lib.rs index 5e8cf923..c41f820b 100644 --- a/parser-utils/src/lib.rs +++ b/parser-utils/src/lib.rs @@ -38,8 +38,20 @@ pub type ResultParser = Expected ParseResult>>; #[macro_export] macro_rules! assert_parses_to { ( $parser: expr, $input: expr, $expected: expr ) => {{ - let mut par = $parser(); - let result = par.parse($input.with_spans().into_atom_stream()).map(|x| x.0); // TODO: check remainder of stream. + let par = $parser(); + let result = par.skip(eof()).parse($input.with_spans().into_atom_stream()).map(|x| x.0); + assert_eq!(result, Ok($expected)); + }} +} + +/// `assert_edn_parses_to!` simplifies some of the boilerplate around running a parser function +/// against string input and expecting a certain result. +#[macro_export] +macro_rules! assert_edn_parses_to { + ( $parser: expr, $input: expr, $expected: expr ) => {{ + let par = $parser(); + let input = edn::parse::value($input).expect("to be able to parse input as EDN"); + let result = par.skip(eof()).parse(input.into_atom_stream()).map(|x| x.0); assert_eq!(result, Ok($expected)); }} } diff --git a/query-algebrizer/src/clauses/mod.rs b/query-algebrizer/src/clauses/mod.rs index c980fd97..917947eb 100644 --- a/query-algebrizer/src/clauses/mod.rs +++ b/query-algebrizer/src/clauses/mod.rs @@ -63,11 +63,15 @@ use types::{ mod inputs; mod or; +mod not; mod pattern; mod predicate; mod resolve; -use validate::validate_or_join; +use validate::{ + validate_not_join, + validate_or_join, +}; pub use self::inputs::QueryInputs; @@ -203,6 +207,22 @@ pub struct ConjoiningClauses { pub extracted_types: BTreeMap, } +impl PartialEq for ConjoiningClauses { + fn eq(&self, other: &ConjoiningClauses) -> bool { + self.empty_because.eq(&other.empty_because) && + self.from.eq(&other.from) && + self.computed_tables.eq(&other.computed_tables) && + self.wheres.eq(&other.wheres) && + self.column_bindings.eq(&other.column_bindings) && + self.input_variables.eq(&other.input_variables) && + self.value_bindings.eq(&other.value_bindings) && + self.known_types.eq(&other.known_types) && + self.extracted_types.eq(&other.extracted_types) + } +} + +impl Eq for ConjoiningClauses {} + impl Debug for ConjoiningClauses { fn fmt(&self, fmt: &mut Formatter) -> ::std::fmt::Result { fmt.debug_struct("ConjoiningClauses") @@ -812,6 +832,10 @@ impl ConjoiningClauses { validate_or_join(&o)?; self.apply_or_join(schema, o) }, + WhereClause::NotJoin(n) => { + validate_not_join(&n)?; + self.apply_not_join(schema, n) + }, _ => unimplemented!(), } } diff --git a/query-algebrizer/src/clauses/not.rs b/query-algebrizer/src/clauses/not.rs new file mode 100644 index 00000000..45723c51 --- /dev/null +++ b/query-algebrizer/src/clauses/not.rs @@ -0,0 +1,545 @@ +// Copyright 2016 Mozilla +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use +// this file except in compliance with the License. You may obtain a copy of the +// License at http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +use mentat_core::Schema; + +use mentat_query::{ + ContainsVariables, + NotJoin, + UnifyVars, +}; + +use clauses::ConjoiningClauses; + +use errors::{ + ErrorKind, + Result, +}; + +use types::{ + ColumnConstraint, + ComputedTable, +}; + +impl ConjoiningClauses { + pub fn apply_not_join(&mut self, schema: &Schema, not_join: NotJoin) -> Result<()> { + let unified = match not_join.unify_vars { + UnifyVars::Implicit => not_join.collect_mentioned_variables(), + UnifyVars::Explicit(vs) => vs.into_iter().collect(), + }; + + let mut template = self.use_as_template(&unified); + + for v in unified.iter() { + if self.value_bindings.contains_key(&v) { + let val = self.value_bindings.get(&v).unwrap().clone(); + template.value_bindings.insert(v.clone(), val); + } else if self.column_bindings.contains_key(&v) { + let col = self.column_bindings.get(&v).unwrap()[0].clone(); + template.column_bindings.insert(v.clone(), vec![col]); + } else { + bail!(ErrorKind::UnboundVariable(v.name())); + } + } + + for clause in not_join.clauses.into_iter() { + template.apply_clause(&schema, clause)?; + } + + if template.is_known_empty() { + return Ok(()); + } + + // We are only expanding column bindings here and not pruning extracted types as we are not projecting values. + template.expand_column_bindings(); + + let subquery = ComputedTable::Subquery(template); + + self.wheres.add_intersection(ColumnConstraint::NotExists(subquery)); + + Ok(()) + } +} + +#[cfg(test)] +mod testing { + extern crate mentat_query_parser; + + use std::rc::Rc; + use std::collections::BTreeSet; + + use super::*; + + use mentat_core::{ + Attribute, + TypedValue, + ValueType, + }; + + use mentat_query::{ + NamespacedKeyword, + PlainSymbol, + Variable + }; + + use self::mentat_query_parser::parse_find_string; + + use clauses::{ + QueryInputs, + add_attribute, + associate_ident, + }; + + use errors::{ + Error, + ErrorKind, + }; + + use types::{ + ColumnAlternation, + ColumnConstraint, + ColumnConstraintOrAlternation, + ColumnIntersection, + DatomsColumn, + DatomsTable, + NumericComparison, + QualifiedAlias, + QueryValue, + SourceAlias, + ValueTypeSet, + }; + + use { + algebrize, + algebrize_with_inputs, + }; + + fn alg(schema: &Schema, input: &str) -> ConjoiningClauses { + let parsed = parse_find_string(input).expect("parse failed"); + algebrize(schema.into(), parsed).expect("algebrize failed").cc + } + + fn alg_with_inputs(schema: &Schema, input: &str, inputs: QueryInputs) -> ConjoiningClauses { + let parsed = parse_find_string(input).expect("parse failed"); + algebrize_with_inputs(schema.into(), parsed, 0, inputs).expect("algebrize failed").cc + } + + fn prepopulated_schema() -> Schema { + let mut schema = Schema::default(); + associate_ident(&mut schema, NamespacedKeyword::new("foo", "name"), 65); + associate_ident(&mut schema, NamespacedKeyword::new("foo", "knows"), 66); + associate_ident(&mut schema, NamespacedKeyword::new("foo", "parent"), 67); + associate_ident(&mut schema, NamespacedKeyword::new("foo", "age"), 68); + associate_ident(&mut schema, NamespacedKeyword::new("foo", "height"), 69); + add_attribute(&mut schema, + 65, + Attribute { + value_type: ValueType::String, + multival: false, + ..Default::default() + }); + add_attribute(&mut schema, + 66, + Attribute { + value_type: ValueType::String, + multival: true, + ..Default::default() + }); + add_attribute(&mut schema, + 67, + Attribute { + value_type: ValueType::String, + multival: true, + ..Default::default() + }); + add_attribute(&mut schema, + 68, + Attribute { + value_type: ValueType::Long, + multival: false, + ..Default::default() + }); + add_attribute(&mut schema, + 69, + Attribute { + value_type: ValueType::Long, + multival: false, + ..Default::default() + }); + schema + } + + fn compare_ccs(left: ConjoiningClauses, right: ConjoiningClauses) { + assert_eq!(left.wheres, right.wheres); + assert_eq!(left.from, right.from); + } + + // not. + #[test] + fn test_successful_not() { + let schema = prepopulated_schema(); + let query = r#" + [:find ?x + :where [?x :foo/knows "John"] + (not [?x :foo/parent "Ámbar"] + [?x :foo/knows "Daphne"])]"#; + let cc = alg(&schema, query); + + let vx = Variable::from_valid_name("?x"); + + let d0 = "datoms00".to_string(); + let d0e = QualifiedAlias::new(d0.clone(), DatomsColumn::Entity); + let d0a = QualifiedAlias::new(d0.clone(), DatomsColumn::Attribute); + let d0v = QualifiedAlias::new(d0.clone(), DatomsColumn::Value); + + let d1 = "datoms01".to_string(); + let d1e = QualifiedAlias::new(d1.clone(), DatomsColumn::Entity); + let d1a = QualifiedAlias::new(d1.clone(), DatomsColumn::Attribute); + let d1v = QualifiedAlias::new(d1.clone(), DatomsColumn::Value); + + let d2 = "datoms02".to_string(); + let d2e = QualifiedAlias::new(d2.clone(), DatomsColumn::Entity); + let d2a = QualifiedAlias::new(d2.clone(), DatomsColumn::Attribute); + let d2v = QualifiedAlias::new(d2.clone(), DatomsColumn::Value); + + let knows = QueryValue::Entid(66); + let parent = QueryValue::Entid(67); + + let john = QueryValue::TypedValue(TypedValue::typed_string("John")); + let ambar = QueryValue::TypedValue(TypedValue::typed_string("Ámbar")); + let daphne = QueryValue::TypedValue(TypedValue::typed_string("Daphne")); + + let mut subquery = ConjoiningClauses::default(); + subquery.from = vec![SourceAlias(DatomsTable::Datoms, d1), + SourceAlias(DatomsTable::Datoms, d2)]; + subquery.column_bindings.insert(vx.clone(), vec![d0e.clone(), d1e.clone(), d2e.clone()]); + subquery.wheres = ColumnIntersection(vec![ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d1a.clone(), parent)), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d1v.clone(), ambar)), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d2a.clone(), knows.clone())), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d2v.clone(), daphne)), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0e.clone(), QueryValue::Column(d1e.clone()))), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0e.clone(), QueryValue::Column(d2e.clone())))]); + + subquery.known_types.insert(vx.clone(), ValueTypeSet::of_one(ValueType::Ref)); + + assert!(!cc.is_known_empty()); + assert_eq!(cc.wheres, ColumnIntersection(vec![ + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0a.clone(), knows.clone())), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0v.clone(), john)), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::NotExists(ComputedTable::Subquery(subquery))), + ])); + assert_eq!(cc.column_bindings.get(&vx), Some(&vec![d0e])); + assert_eq!(cc.from, vec![SourceAlias(DatomsTable::Datoms, d0)]); + } + + // not-join. + #[test] + fn test_successful_not_join() { + let schema = prepopulated_schema(); + let query = r#" + [:find ?x + :where [?x :foo/knows ?y] + [?x :foo/age 11] + [?x :foo/name "John"] + (not-join [?x ?y] + [?x :foo/parent ?y])]"#; + let cc = alg(&schema, query); + + let vx = Variable::from_valid_name("?x"); + let vy = Variable::from_valid_name("?y"); + + let d0 = "datoms00".to_string(); + let d0e = QualifiedAlias::new(d0.clone(), DatomsColumn::Entity); + let d0a = QualifiedAlias::new(d0.clone(), DatomsColumn::Attribute); + let d0v = QualifiedAlias::new(d0.clone(), DatomsColumn::Value); + + let d1 = "datoms01".to_string(); + let d1e = QualifiedAlias::new(d1.clone(), DatomsColumn::Entity); + let d1a = QualifiedAlias::new(d1.clone(), DatomsColumn::Attribute); + let d1v = QualifiedAlias::new(d1.clone(), DatomsColumn::Value); + + let d2 = "datoms02".to_string(); + let d2e = QualifiedAlias::new(d2.clone(), DatomsColumn::Entity); + let d2a = QualifiedAlias::new(d2.clone(), DatomsColumn::Attribute); + let d2v = QualifiedAlias::new(d2.clone(), DatomsColumn::Value); + + let d3 = "datoms03".to_string(); + let d3e = QualifiedAlias::new(d3.clone(), DatomsColumn::Entity); + let d3a = QualifiedAlias::new(d3.clone(), DatomsColumn::Attribute); + let d3v = QualifiedAlias::new(d3.clone(), DatomsColumn::Value); + + let name = QueryValue::Entid(65); + let knows = QueryValue::Entid(66); + let parent = QueryValue::Entid(67); + let age = QueryValue::Entid(68); + + let john = QueryValue::TypedValue(TypedValue::typed_string("John")); + let eleven = QueryValue::PrimitiveLong(11); + + let mut subquery = ConjoiningClauses::default(); + subquery.from = vec![SourceAlias(DatomsTable::Datoms, d3)]; + subquery.column_bindings.insert(vx.clone(), vec![d0e.clone(), d3e.clone()]); + subquery.column_bindings.insert(vy.clone(), vec![d0v.clone(), d3v.clone()]); + subquery.wheres = ColumnIntersection(vec![ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d3a.clone(), parent)), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0e.clone(), QueryValue::Column(d3e.clone()))), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0v.clone(), QueryValue::Column(d3v.clone())))]); + + subquery.known_types.insert(vx.clone(), ValueTypeSet::of_one(ValueType::Ref)); + subquery.known_types.insert(vy.clone(), ValueTypeSet::of_one(ValueType::String)); + + assert!(!cc.is_known_empty()); + let expected_wheres = ColumnIntersection(vec![ + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0a.clone(), knows)), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d1a.clone(), age.clone())), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d1v.clone(), eleven)), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d2a.clone(), name.clone())), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d2v.clone(), john)), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::NotExists(ComputedTable::Subquery(subquery))), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0e.clone(), QueryValue::Column(d1e.clone()))), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0e.clone(), QueryValue::Column(d2e.clone()))), + ]); + assert_eq!(cc.wheres, expected_wheres); + assert_eq!(cc.column_bindings.get(&vx), Some(&vec![d0e, d1e, d2e])); + assert_eq!(cc.from, vec![SourceAlias(DatomsTable::Datoms, d0), + SourceAlias(DatomsTable::Datoms, d1), + SourceAlias(DatomsTable::Datoms, d2)]); + } + + // Not with a pattern and a predicate. + #[test] + fn test_not_with_pattern_and_predicate() { + let schema = prepopulated_schema(); + let query = r#" + [:find ?x ?age + :where + [?x :foo/age ?age] + [[< ?age 30]] + (not [?x :foo/knows "John"] + [?x :foo/knows "Daphne"])]"#; + let cc = alg(&schema, query); + + let vx = Variable::from_valid_name("?x"); + + let d0 = "datoms00".to_string(); + let d0e = QualifiedAlias::new(d0.clone(), DatomsColumn::Entity); + let d0a = QualifiedAlias::new(d0.clone(), DatomsColumn::Attribute); + let d0v = QualifiedAlias::new(d0.clone(), DatomsColumn::Value); + + let d1 = "datoms01".to_string(); + let d1e = QualifiedAlias::new(d1.clone(), DatomsColumn::Entity); + let d1a = QualifiedAlias::new(d1.clone(), DatomsColumn::Attribute); + let d1v = QualifiedAlias::new(d1.clone(), DatomsColumn::Value); + + let d2 = "datoms02".to_string(); + let d2e = QualifiedAlias::new(d2.clone(), DatomsColumn::Entity); + let d2a = QualifiedAlias::new(d2.clone(), DatomsColumn::Attribute); + let d2v = QualifiedAlias::new(d2.clone(), DatomsColumn::Value); + + let knows = QueryValue::Entid(66); + let age = QueryValue::Entid(68); + + let john = QueryValue::TypedValue(TypedValue::typed_string("John")); + let daphne = QueryValue::TypedValue(TypedValue::typed_string("Daphne")); + + let mut subquery = ConjoiningClauses::default(); + subquery.from = vec![SourceAlias(DatomsTable::Datoms, d1), + SourceAlias(DatomsTable::Datoms, d2)]; + subquery.column_bindings.insert(vx.clone(), vec![d0e.clone(), d1e.clone(), d2e.clone()]); + subquery.wheres = ColumnIntersection(vec![ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d1a.clone(), knows.clone())), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d1v.clone(), john.clone())), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d2a.clone(), knows.clone())), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d2v.clone(), daphne.clone())), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0e.clone(), QueryValue::Column(d1e.clone()))), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0e.clone(), QueryValue::Column(d2e.clone())))]); + + subquery.known_types.insert(vx.clone(), ValueTypeSet::of_one(ValueType::Ref)); + + assert!(!cc.is_known_empty()); + assert_eq!(cc.wheres, ColumnIntersection(vec![ + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0a.clone(), age.clone())), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::NumericInequality { + operator: NumericComparison::LessThan, + left: QueryValue::Column(d0v.clone()), + right: QueryValue::TypedValue(TypedValue::Long(30)), + }), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::NotExists(ComputedTable::Subquery(subquery))), + ])); + assert_eq!(cc.column_bindings.get(&vx), Some(&vec![d0e])); + assert_eq!(cc.from, vec![SourceAlias(DatomsTable::Datoms, d0)]); + } + + // not with an or + #[test] + fn test_not_with_or() { + let schema = prepopulated_schema(); + let query = r#" + [:find ?x + :where [?x :foo/knows "Bill"] + (not (or [?x :foo/knows "John"] + [?x :foo/knows "Ámbar"]) + [?x :foo/parent "Daphne"])]"#; + let cc = alg(&schema, query); + + let d0 = "datoms00".to_string(); + let d0e = QualifiedAlias::new(d0.clone(), DatomsColumn::Entity); + let d0a = QualifiedAlias::new(d0.clone(), DatomsColumn::Attribute); + let d0v = QualifiedAlias::new(d0.clone(), DatomsColumn::Value); + + let d1 = "datoms01".to_string(); + let d1e = QualifiedAlias::new(d1.clone(), DatomsColumn::Entity); + let d1a = QualifiedAlias::new(d1.clone(), DatomsColumn::Attribute); + let d1v = QualifiedAlias::new(d1.clone(), DatomsColumn::Value); + + let d2 = "datoms02".to_string(); + let d2e = QualifiedAlias::new(d2.clone(), DatomsColumn::Entity); + let d2a = QualifiedAlias::new(d2.clone(), DatomsColumn::Attribute); + let d2v = QualifiedAlias::new(d2.clone(), DatomsColumn::Value); + + let vx = Variable::from_valid_name("?x"); + + let knows = QueryValue::Entid(66); + let parent = QueryValue::Entid(67); + + let bill = QueryValue::TypedValue(TypedValue::typed_string("Bill")); + let john = QueryValue::TypedValue(TypedValue::typed_string("John")); + let ambar = QueryValue::TypedValue(TypedValue::typed_string("Ámbar")); + let daphne = QueryValue::TypedValue(TypedValue::typed_string("Daphne")); + + + let mut subquery = ConjoiningClauses::default(); + subquery.from = vec![SourceAlias(DatomsTable::Datoms, d1), + SourceAlias(DatomsTable::Datoms, d2)]; + subquery.column_bindings.insert(vx.clone(), vec![d0e.clone(), d1e.clone(), d2e.clone()]); + subquery.wheres = ColumnIntersection(vec![ColumnConstraintOrAlternation::Alternation(ColumnAlternation(vec![ + ColumnIntersection(vec![ + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d1a.clone(), knows.clone())), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d1v.clone(), john))]), + ColumnIntersection(vec![ + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d1a.clone(), knows.clone())), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d1v.clone(), ambar))]), + ])), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d2a.clone(), parent)), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d2v.clone(), daphne)), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0e.clone(), QueryValue::Column(d1e.clone()))), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0e.clone(), QueryValue::Column(d2e.clone())))]); + + subquery.known_types.insert(vx.clone(), ValueTypeSet::of_one(ValueType::Ref)); + + assert!(!cc.is_known_empty()); + assert_eq!(cc.wheres, ColumnIntersection(vec![ + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0a.clone(), knows)), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0v.clone(), bill)), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::NotExists(ComputedTable::Subquery(subquery))), + ])); + } + + // not-join with an input variable + #[test] + fn test_not_with_in() { + let schema = prepopulated_schema(); + let query = r#" + [:find ?x + :in ?y + :where [?x :foo/knows "Bill"] + (not [?x :foo/knows ?y])]"#; + + let inputs = QueryInputs::with_value_sequence(vec![(Variable::from_valid_name("?y"),TypedValue::String(Rc::new("John".to_string())))]); + let cc = alg_with_inputs(&schema, query, inputs); + + let vx = Variable::from_valid_name("?x"); + let vy = Variable::from_valid_name("?y"); + + let knows = QueryValue::Entid(66); + + let bill = QueryValue::TypedValue(TypedValue::typed_string("Bill")); + let john = QueryValue::TypedValue(TypedValue::typed_string("John")); + + let d0 = "datoms00".to_string(); + let d0e = QualifiedAlias::new(d0.clone(), DatomsColumn::Entity); + let d0a = QualifiedAlias::new(d0.clone(), DatomsColumn::Attribute); + let d0v = QualifiedAlias::new(d0.clone(), DatomsColumn::Value); + + let d1 = "datoms01".to_string(); + let d1e = QualifiedAlias::new(d1.clone(), DatomsColumn::Entity); + let d1a = QualifiedAlias::new(d1.clone(), DatomsColumn::Attribute); + let d1v = QualifiedAlias::new(d1.clone(), DatomsColumn::Value); + + let mut subquery = ConjoiningClauses::default(); + subquery.from = vec![SourceAlias(DatomsTable::Datoms, d1)]; + subquery.column_bindings.insert(vx.clone(), vec![d0e.clone(), d1e.clone()]); + subquery.wheres = ColumnIntersection(vec![ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d1a.clone(), knows.clone())), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d1v.clone(), john)), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0e.clone(), QueryValue::Column(d1e.clone())))]); + + subquery.known_types.insert(vx.clone(), ValueTypeSet::of_one(ValueType::Ref)); + subquery.known_types.insert(vy.clone(), ValueTypeSet::of_one(ValueType::String)); + + let mut input_vars: BTreeSet = BTreeSet::default(); + input_vars.insert(vy.clone()); + subquery.input_variables = input_vars; + subquery.value_bindings.insert(vy.clone(), TypedValue::typed_string("John")); + + assert!(!cc.is_known_empty()); + assert_eq!(cc.wheres, ColumnIntersection(vec![ + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0a.clone(), knows)), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0v.clone(), bill)), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::NotExists(ComputedTable::Subquery(subquery))), + ])); + } + + // Test that if any single clause in the `not` fails to resolve the whole clause is considered empty + #[test] + fn test_fails_if_any_clause_invalid() { + let schema = prepopulated_schema(); + let query = r#" + [:find ?x + :where [?x :foo/knows "Bill"] + (not [?x :foo/nope "John"] + [?x :foo/parent "Ámbar"] + [?x :foo/nope "Daphne"])]"#; + let cc = alg(&schema, query); + assert!(!cc.is_known_empty()); + compare_ccs(cc, + alg(&schema, + r#"[:find ?x :where [?x :foo/knows "Bill"]]"#)); + } + + /// Test that if all the attributes in an `not` fail to resolve, the `cc` isn't considered empty. + #[test] + fn test_no_clauses_succeed() { + let schema = prepopulated_schema(); + let query = r#" + [:find ?x + :where [?x :foo/knows "John"] + (not [?x :foo/nope "Ámbar"] + [?x :foo/nope "Daphne"])]"#; + let cc = alg(&schema, query); + assert!(!cc.is_known_empty()); + compare_ccs(cc, + alg(&schema, r#"[:find ?x :where [?x :foo/knows "John"]]"#)); + + } + + #[test] + fn test_unbound_var_fails() { + let schema = prepopulated_schema(); + let query = r#" + [:find ?x + :in ?y + :where (not [?x :foo/knows ?y])]"#; + let parsed = parse_find_string(query).expect("parse failed"); + let err = algebrize(&schema, parsed).err(); + assert!(err.is_some()); + match err.unwrap() { + Error(ErrorKind::UnboundVariable(var), _) => { assert_eq!(var, PlainSymbol("?x".to_string())); }, + x => panic!("expected Unbound Variable error, got {:?}", x), + } + } +} diff --git a/query-algebrizer/src/errors.rs b/query-algebrizer/src/errors.rs index 2434bd3c..b76f5ac5 100644 --- a/query-algebrizer/src/errors.rs +++ b/query-algebrizer/src/errors.rs @@ -57,6 +57,12 @@ error_chain! { description("non-matching variables in 'or' clause") display("non-matching variables in 'or' clause") } + + NonMatchingVariablesInNotClause { + // TODO: flesh out. + description("non-matching variables in 'not' clause") + display("non-matching variables in 'not' clause") + } } } diff --git a/query-algebrizer/src/types.rs b/query-algebrizer/src/types.rs index a9988a67..e26006c9 100644 --- a/query-algebrizer/src/types.rs +++ b/query-algebrizer/src/types.rs @@ -45,8 +45,9 @@ pub enum DatomsTable { } /// A source of rows that isn't a named table -- typically a subquery or union. +#[derive(PartialEq, Eq, Debug)] pub enum ComputedTable { - // Subquery(BTreeSet, ::clauses::ConjoiningClauses), + Subquery(::clauses::ConjoiningClauses), Union { projection: BTreeSet, type_extraction: BTreeSet, @@ -294,6 +295,7 @@ pub enum ColumnConstraint { right: QueryValue, }, HasType(TableAlias, ValueType), + NotExists(ComputedTable), } #[derive(PartialEq, Eq, Debug)] @@ -407,6 +409,9 @@ impl Debug for ColumnConstraint { &HasType(ref qa, value_type) => { write!(f, "{:?}.value_type_tag = {:?}", qa, value_type) }, + &NotExists(ref ct) => { + write!(f, "NOT EXISTS {:?}", ct) + }, } } } diff --git a/query-algebrizer/src/validate.rs b/query-algebrizer/src/validate.rs index d10ff237..82fcc327 100644 --- a/query-algebrizer/src/validate.rs +++ b/query-algebrizer/src/validate.rs @@ -13,6 +13,7 @@ use std::collections::BTreeSet; use mentat_query::{ ContainsVariables, OrJoin, + NotJoin, Variable, UnifyVars, }; @@ -74,6 +75,23 @@ pub fn validate_or_join(or_join: &OrJoin) -> Result<()> { } } +pub fn validate_not_join(not_join: &NotJoin) -> Result<()> { + // Grab our mentioned variables and ensure that the rules are followed. + match not_join.unify_vars { + UnifyVars::Implicit => { + Ok(()) + }, + UnifyVars::Explicit(ref vars) => { + // The joined vars must each appear somewhere in the clause's mentioned variables. + let var_set: BTreeSet = vars.iter().cloned().collect(); + if !var_set.is_subset(¬_join.collect_mentioned_variables()) { + bail!(ErrorKind::NonMatchingVariablesInNotClause); + } + Ok(()) + }, + } +} + #[cfg(test)] mod tests { extern crate mentat_core; @@ -96,7 +114,10 @@ mod tests { use clauses::ident; - use super::validate_or_join; + use super::{ + validate_not_join, + validate_or_join, + }; fn value_ident(ns: &str, name: &str) -> PatternValuePlace { PatternValuePlace::IdentOrKeyword(::std::rc::Rc::new(NamespacedKeyword::new(ns, name))) @@ -229,4 +250,129 @@ mod tests { _ => panic!(), }; } + + + /// Tests that the top-level form is a valid `not`, returning the clauses. + fn valid_not_join(parsed: FindQuery, expected_unify: UnifyVars) -> Vec { + // Filter out all the clauses that are not `not`s. + let mut nots = parsed.where_clauses.into_iter().filter(|x| match x { + &WhereClause::NotJoin(_) => true, + _ => false, + }); + + // There should be only one not clause. + let clause = nots.next().unwrap(); + assert_eq!(None, nots.next()); + + match clause { + WhereClause::NotJoin(not_join) => { + // It's valid: the variables are the same in each branch. + assert_eq!((), validate_not_join(¬_join).unwrap()); + assert_eq!(expected_unify, not_join.unify_vars); + not_join.clauses + }, + _ => panic!(), + } + } + + /// Test that a `not` is valid if it is implicit. + #[test] + fn test_success_not() { + let query = r#"[:find ?name + :where [?id :artist/name ?name] + (not [?id :artist/country :country/CA] + [?id :artist/country :country/GB])]"#; + let parsed = parse_find_string(query).expect("expected successful parse"); + let clauses = valid_not_join(parsed, UnifyVars::Implicit); + + let id = PatternNonValuePlace::Variable(Variable::from_valid_name("?id")); + let artist_country = ident("artist", "country"); + // Check each part of the body + let mut parts = clauses.into_iter(); + match (parts.next(), parts.next(), parts.next()) { + (Some(clause1), Some(clause2), None) => { + assert_eq!( + clause1, + WhereClause::Pattern(Pattern { + source: None, + entity: id.clone(), + attribute: artist_country.clone(), + value: value_ident("country", "CA"), + tx: PatternNonValuePlace::Placeholder, + })); + assert_eq!( + clause2, + WhereClause::Pattern(Pattern { + source: None, + entity: id, + attribute: artist_country, + value: value_ident("country", "GB"), + tx: PatternNonValuePlace::Placeholder, + })); + }, + _ => panic!(), + }; + } + + #[test] + fn test_success_not_join() { + let query = r#"[:find ?artist + :where [?artist :artist/name] + (not-join [?artist] + [?release :release/artists ?artist] + [?release :release/year 1970])]"#; + let parsed = parse_find_string(query).expect("expected successful parse"); + let clauses = valid_not_join(parsed, UnifyVars::Explicit(vec![Variable::from_valid_name("?artist")])); + + let release = PatternNonValuePlace::Variable(Variable::from_valid_name("?release")); + let artist = PatternValuePlace::Variable(Variable::from_valid_name("?artist")); + // Let's do some detailed parse checks. + let mut parts = clauses.into_iter(); + match (parts.next(), parts.next(), parts.next()) { + (Some(clause1), Some(clause2), None) => { + assert_eq!( + clause1, + WhereClause::Pattern(Pattern { + source: None, + entity: release.clone(), + attribute: ident("release", "artists"), + value: artist, + tx: PatternNonValuePlace::Placeholder, + })); + assert_eq!( + clause2, + WhereClause::Pattern(Pattern { + source: None, + entity: release, + attribute: ident("release", "year"), + value: PatternValuePlace::EntidOrInteger(1970), + tx: PatternNonValuePlace::Placeholder, + })); + }, + _ => panic!(), + }; + } + + /// Test that a `not-join` that does not use the joining var fails to validate. + #[test] + fn test_invalid_explicit_not_join_non_matching_join_vars() { + let query = r#"[:find ?artist + :where [?artist :artist/name] + (not-join [?artist] + [?release :release/artists "Pink Floyd"] + [?release :release/year 1970])]"#; + let parsed = parse_find_string(query).expect("expected successful parse"); + let mut nots = parsed.where_clauses.iter().filter(|&x| match *x { + WhereClause::NotJoin(_) => true, + _ => false, + }); + + let clause = nots.next().unwrap().clone(); + assert_eq!(None, nots.next()); + + match clause { + WhereClause::NotJoin(not_join) => assert!(validate_not_join(¬_join).is_err()), + _ => panic!(), + } + } } \ No newline at end of file diff --git a/query-parser/src/parse.rs b/query-parser/src/parse.rs index f7e73c1d..b9c36077 100644 --- a/query-parser/src/parse.rs +++ b/query-parser/src/parse.rs @@ -47,6 +47,7 @@ use self::mentat_query::{ Order, OrJoin, OrWhereClause, + NotJoin, Pattern, PatternNonValuePlace, PatternValuePlace, @@ -191,6 +192,10 @@ def_matches_plain_symbol!(Where, or, "or"); def_matches_plain_symbol!(Where, or_join, "or-join"); +def_matches_plain_symbol!(Where, not, "not"); + +def_matches_plain_symbol!(Where, not_join, "not-join"); + def_parser!(Where, rule_vars, Vec, { seq() .of_exactly(many1(Query::variable())) @@ -230,6 +235,33 @@ def_parser!(Where, or_join_clause, WhereClause, { })) }); +def_parser!(Where, not_clause, WhereClause, { + seq() + .of_exactly(Where::not() + .with(many1(Where::clause())) + .map(|clauses| { + WhereClause::NotJoin( + NotJoin { + unify_vars: UnifyVars::Implicit, + clauses: clauses, + }) + })) +}); + +def_parser!(Where, not_join_clause, WhereClause, { + seq() + .of_exactly(Where::not_join() + .with(Where::rule_vars()) + .and(many1(Where::clause())) + .map(|(vars, clauses)| { + WhereClause::NotJoin( + NotJoin { + unify_vars: UnifyVars::Explicit(vars), + clauses: clauses, + }) + })) +}); + /// A vector containing just a parenthesized filter expression. def_parser!(Where, pred, WhereClause, { // Accept either a nested list or a nested vector here: @@ -294,6 +326,8 @@ def_parser!(Where, clause, WhereClause, { // We don't yet handle source vars. try(Where::or_join_clause()), try(Where::or_clause()), + try(Where::not_join_clause()), + try(Where::not_clause()), try(Where::pred()), ]) @@ -671,6 +705,49 @@ mod test { }))]))); } + #[test] + fn test_not() { + let e = edn::PlainSymbol::new("?e"); + let a = edn::PlainSymbol::new("?a"); + let v = edn::PlainSymbol::new("?v"); + + assert_edn_parses_to!(Where::not_clause, + "(not [?e ?a ?v])", + WhereClause::NotJoin( + NotJoin { + unify_vars: UnifyVars::Implicit, + clauses: vec![ + WhereClause::Pattern(Pattern { + source: None, + entity: PatternNonValuePlace::Variable(variable(e)), + attribute: PatternNonValuePlace::Variable(variable(a)), + value: PatternValuePlace::Variable(variable(v)), + tx: PatternNonValuePlace::Placeholder, + })], + })); + } + + #[test] + fn test_not_join() { + let e = edn::PlainSymbol::new("?e"); + let a = edn::PlainSymbol::new("?a"); + let v = edn::PlainSymbol::new("?v"); + + assert_edn_parses_to!(Where::not_join_clause, + "(not-join [?e] [?e ?a ?v])", + WhereClause::NotJoin( + NotJoin { + unify_vars: UnifyVars::Explicit(vec![variable(e.clone())]), + clauses: vec![WhereClause::Pattern(Pattern { + source: None, + entity: PatternNonValuePlace::Variable(variable(e)), + attribute: PatternNonValuePlace::Variable(variable(a)), + value: PatternValuePlace::Variable(variable(v)), + tx: PatternNonValuePlace::Placeholder, + })], + })); + } + #[test] fn test_find_sp_variable() { let sym = edn::PlainSymbol::new("?x"); diff --git a/query-sql/src/lib.rs b/query-sql/src/lib.rs index 28fb2ddc..6ca151a3 100644 --- a/query-sql/src/lib.rs +++ b/query-sql/src/lib.rs @@ -14,6 +14,8 @@ extern crate mentat_query; extern crate mentat_query_algebrizer; extern crate mentat_sql; +use std::boxed::Box; + use mentat_core::{ Entid, TypedValue, @@ -100,6 +102,9 @@ pub enum Constraint { In { left: ColumnOrExpression, list: Vec, + }, + NotExists { + subquery: TableOrSubquery, } } @@ -146,7 +151,7 @@ pub struct Join { pub enum TableOrSubquery { Table(SourceAlias), Union(Vec, TableAlias), - // TODO: Subquery. + Subquery(Box), } pub enum FromClause { @@ -326,6 +331,12 @@ impl QueryFragment for Constraint { out.push_sql(")"); Ok(()) }, + &NotExists { ref subquery } => { + out.push_sql("NOT EXISTS ("); + subquery.push_sql(out)?; + out.push_sql(")"); + Ok(()) + } } } } @@ -379,6 +390,10 @@ impl QueryFragment for TableOrSubquery { out.push_sql(") AS "); out.push_identifier(table_alias.as_str()) }, + &Subquery(ref subquery) => { + subquery.push_sql(out)?; + Ok(()) + }, } } } diff --git a/query-translator/src/translate.rs b/query-translator/src/translate.rs index 6de63955..17c5d00e 100644 --- a/query-translator/src/translate.rs +++ b/query-translator/src/translate.rs @@ -8,6 +8,8 @@ // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. +use std::rc::Rc; + use mentat_core::{ SQLValueType, TypedValue, @@ -150,6 +152,13 @@ impl ToConstraint for ColumnConstraint { let column = QualifiedAlias::new(table, DatomsColumn::ValueTypeTag).to_column(); Constraint::equal(column, ColumnOrExpression::Integer(value_type.value_type_tag())) }, + + NotExists(computed_table) => { + let subquery = table_for_computed(computed_table, TableAlias::new()); + Constraint::NotExists { + subquery: subquery, + } + }, } } } @@ -230,6 +239,9 @@ fn table_for_computed(computed: ComputedTable, alias: TableAlias) -> TableOrSubq }).collect(), alias) }, + ComputedTable::Subquery(subquery) => { + TableOrSubquery::Subquery(Box::new(cc_to_exists(subquery))) + } } } @@ -268,7 +280,6 @@ fn cc_to_select_query(projection: Projection, FromClause::TableList(TableList(tables.collect())) }; - // Turn the query-centric order clauses into column-orders. let order = order.map_or(vec![], |vec| { vec.into_iter().map(|o| o.into()).collect() }); let limit = if cc.empty_because.is_some() { Limit::Fixed(0) } else { limit }; SelectQuery { @@ -295,10 +306,10 @@ pub fn cc_to_exists(cc: ConjoiningClauses) -> SelectQuery { from: FromClause::Nothing, constraints: vec![], order: vec![], - limit: Limit::Fixed(0), + limit: Limit::None, } } else { - cc_to_select_query(Projection::One, cc, false, None, Limit::Fixed(1)) + cc_to_select_query(Projection::One, cc, false, None, Limit::None) } } diff --git a/query-translator/tests/translate.rs b/query-translator/tests/translate.rs index fef09d2a..a13c2fa7 100644 --- a/query-translator/tests/translate.rs +++ b/query-translator/tests/translate.rs @@ -395,7 +395,6 @@ fn test_complex_or_join() { make_arg("$v2", "Foo")]); } - #[test] fn test_complex_or_join_type_projection() { let mut schema = Schema::default(); @@ -427,6 +426,61 @@ fn test_complex_or_join_type_projection() { assert_eq!(args, vec![]); } +#[test] +fn test_not() { + let mut schema = Schema::default(); + associate_ident(&mut schema, NamespacedKeyword::new("page", "url"), 97); + associate_ident(&mut schema, NamespacedKeyword::new("page", "title"), 98); + associate_ident(&mut schema, NamespacedKeyword::new("page", "bookmarked"), 99); + for x in 97..99 { + add_attribute(&mut schema, x, Attribute { + value_type: ValueType::String, + ..Default::default() + }); + } + add_attribute(&mut schema, 99, Attribute { + value_type: ValueType::Boolean, + ..Default::default() + }); + + let query = r#"[:find ?title + :where [?page :page/title ?title] + (not [?page :page/url "http://foo.com/"] + [?page :page/bookmarked true])]"#; + let SQLQuery { sql, args } = translate(&schema, query); + assert_eq!(sql, "SELECT DISTINCT `datoms00`.v AS `?title` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 98 AND NOT EXISTS (SELECT 1 FROM `datoms` AS `datoms01`, `datoms` AS `datoms02` WHERE `datoms01`.a = 97 AND `datoms01`.v = $v0 AND `datoms02`.a = 99 AND `datoms02`.v = 1 AND `datoms00`.e = `datoms01`.e AND `datoms00`.e = `datoms02`.e)"); + assert_eq!(args, vec![make_arg("$v0", "http://foo.com/")]); +} + +#[test] +fn test_not_join() { + let mut schema = Schema::default(); + associate_ident(&mut schema, NamespacedKeyword::new("page", "url"), 97); + associate_ident(&mut schema, NamespacedKeyword::new("bookmarks", "page"), 98); + associate_ident(&mut schema, NamespacedKeyword::new("bookmarks", "date_created"), 99); + add_attribute(&mut schema, 97, Attribute { + value_type: ValueType::String, + ..Default::default() + }); + add_attribute(&mut schema, 98, Attribute { + value_type: ValueType::Ref, + ..Default::default() + }); + add_attribute(&mut schema, 99, Attribute { + value_type: ValueType::String, + ..Default::default() + }); + + let query = r#"[:find ?url + :where [?url :page/url] + (not-join [?url] + [?page :bookmarks/page ?url] + [?page :bookmarks/date_created "4/4/2017"])]"#; + let SQLQuery { sql, args } = translate(&schema, query); + assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?url` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 97 AND NOT EXISTS (SELECT 1 FROM `datoms` AS `datoms01`, `datoms` AS `datoms02` WHERE `datoms01`.a = 98 AND `datoms02`.a = 99 AND `datoms02`.v = $v0 AND `datoms01`.e = `datoms02`.e AND `datoms00`.e = `datoms01`.v)"); + assert_eq!(args, vec![make_arg("$v0", "4/4/2017")]); +} + #[test] fn test_with_without_aggregate() { let schema = prepopulated_schema(); diff --git a/query/src/lib.rs b/query/src/lib.rs index a0b1500d..cf102545 100644 --- a/query/src/lib.rs +++ b/query/src/lib.rs @@ -597,11 +597,16 @@ pub struct OrJoin { mentioned_vars: Option>, } +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct NotJoin { + pub unify_vars: UnifyVars, + pub clauses: Vec, +} + #[allow(dead_code)] #[derive(Clone, Debug, Eq, PartialEq)] pub enum WhereClause { - Not, - NotJoin, + NotJoin(NotJoin), OrJoin(OrJoin), Pred(Predicate), WhereFn, @@ -669,8 +674,7 @@ impl ContainsVariables for WhereClause { &OrJoin(ref o) => o.accumulate_mentioned_variables(acc), &Pred(ref p) => p.accumulate_mentioned_variables(acc), &Pattern(ref p) => p.accumulate_mentioned_variables(acc), - &Not => (), - &NotJoin => (), + &NotJoin(ref n) => n.accumulate_mentioned_variables(acc), &WhereFn => (), &RuleExpr => (), } @@ -717,6 +721,14 @@ impl OrJoin { } } +impl ContainsVariables for NotJoin { + fn accumulate_mentioned_variables(&self, acc: &mut BTreeSet) { + for clause in &self.clauses { + clause.accumulate_mentioned_variables(acc); + } + } +} + impl ContainsVariables for Predicate { fn accumulate_mentioned_variables(&self, acc: &mut BTreeSet) { for arg in &self.args {