From 5cd53aff4429a6f503e79957c7a0273e895f2a33 Mon Sep 17 00:00:00 2001 From: Richard Newman Date: Mon, 17 Apr 2017 14:18:41 -0700 Subject: [PATCH 1/9] Pre: unused imports. --- query-translator/src/translate.rs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/query-translator/src/translate.rs b/query-translator/src/translate.rs index ded2a78c..9161ac53 100644 --- a/query-translator/src/translate.rs +++ b/query-translator/src/translate.rs @@ -14,11 +14,6 @@ use mentat_core::{ ValueType, }; -use mentat_query::{ - Direction, - Variable, -}; - use mentat_query_algebrizer::{ AlgebraicQuery, ColumnAlternation, From 5718ce01555ff05c001202c402ab88cd0fe1d36f Mon Sep 17 00:00:00 2001 From: Richard Newman Date: Mon, 17 Apr 2017 13:29:40 -0700 Subject: [PATCH 2/9] Pre: add two checks to translate tests to fix unused var warning. --- query-translator/tests/translate.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/query-translator/tests/translate.rs b/query-translator/tests/translate.rs index f0c4d6f9..452cce0d 100644 --- a/query-translator/tests/translate.rs +++ b/query-translator/tests/translate.rs @@ -382,6 +382,7 @@ fn test_order_by() { FROM `datoms` AS `datoms00` \ WHERE `datoms00`.a = 99 \ ORDER BY `?y` DESC"); + assert_eq!(args, vec![]); // Unknown type. let input = r#"[:find ?x :with ?y :where [?x _ ?y] :order ?y ?x]"#; @@ -390,4 +391,5 @@ fn test_order_by() { `all_datoms00`.value_type_tag AS `?y_value_type_tag` \ FROM `all_datoms` AS `all_datoms00` \ ORDER BY `?y_value_type_tag` ASC, `?y` ASC, `?x` ASC"); + assert_eq!(args, vec![]); } \ No newline at end of file From 8ddbc834aec63ac120c8642c40ae56c2047300bb Mon Sep 17 00:00:00 2001 From: Richard Newman Date: Mon, 17 Apr 2017 13:30:35 -0700 Subject: [PATCH 3/9] Pre: take Variables instead of Strings in public API, for now. --- src/conn.rs | 10 ++++++++-- src/query.rs | 3 ++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/conn.rs b/src/conn.rs index 0aa642f0..340e0210 100644 --- a/src/conn.rs +++ b/src/conn.rs @@ -16,18 +16,24 @@ use std::sync::{Arc, Mutex}; use rusqlite; use edn; -use errors::*; + use mentat_core::{ Schema, TypedValue, }; + use mentat_db::db; use mentat_db::{ transact, PartitionMap, TxReport, }; + +use mentat_query::Variable; + use mentat_tx_parser; + +use errors::*; use query::{ q_once, QueryResults, @@ -112,7 +118,7 @@ impl Conn { query: &str, inputs: T, limit: U) -> Result - where T: Into>>, + where T: Into>>, U: Into> { diff --git a/src/query.rs b/src/query.rs index d1dbd0e2..89515499 100644 --- a/src/query.rs +++ b/src/query.rs @@ -23,6 +23,7 @@ use mentat_query_algebrizer::algebrize; pub use mentat_query::{ NamespacedKeyword, PlainSymbol, + Variable, }; use mentat_query_parser::{ @@ -59,7 +60,7 @@ pub fn q_once<'sqlite, 'schema, 'query, T, U> query: &'query str, inputs: T, limit: U) -> QueryExecutionResult - where T: Into>>, + where T: Into>>, U: Into> { // TODO: validate inputs. From 01af45ab3f5aea7b421ee5fa1b6ab64c89918bf9 Mon Sep 17 00:00:00 2001 From: Richard Newman Date: Mon, 17 Apr 2017 21:09:45 -0700 Subject: [PATCH 4/9] Pre: define Display for ValueType. --- core/src/lib.rs | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/core/src/lib.rs b/core/src/lib.rs index a6390a8d..67b0aa63 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -17,6 +17,7 @@ extern crate edn; pub mod values; use std::collections::BTreeMap; +use std::fmt; use std::rc::Rc; use self::ordered_float::OrderedFloat; use self::edn::NamespacedKeyword; @@ -44,19 +45,33 @@ pub enum ValueType { } impl ValueType { - pub fn to_edn_value(&self) -> edn::Value { + pub fn to_edn_value(self) -> edn::Value { match self { - &ValueType::Ref => values::DB_TYPE_REF.clone(), - &ValueType::Boolean => values::DB_TYPE_BOOLEAN.clone(), - &ValueType::Instant => values::DB_TYPE_INSTANT.clone(), - &ValueType::Long => values::DB_TYPE_LONG.clone(), - &ValueType::Double => values::DB_TYPE_DOUBLE.clone(), - &ValueType::String => values::DB_TYPE_STRING.clone(), - &ValueType::Keyword => values::DB_TYPE_KEYWORD.clone(), + ValueType::Ref => values::DB_TYPE_REF.clone(), + ValueType::Boolean => values::DB_TYPE_BOOLEAN.clone(), + ValueType::Instant => values::DB_TYPE_INSTANT.clone(), + ValueType::Long => values::DB_TYPE_LONG.clone(), + ValueType::Double => values::DB_TYPE_DOUBLE.clone(), + ValueType::String => values::DB_TYPE_STRING.clone(), + ValueType::Keyword => values::DB_TYPE_KEYWORD.clone(), } } } +impl fmt::Display for ValueType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", match *self { + ValueType::Ref => "db.type/ref", + ValueType::Boolean => "db.type/boolean", + ValueType::Instant => "db.type/instant", + ValueType::Long => "db.type/long", + ValueType::Double => "db.type/double", + ValueType::String => "db.type/string", + ValueType::Keyword => "db.type/keyword", + }) + } +} + /// Represents a Mentat value in a particular value set. // TODO: expand to include :db.type/{instant,url,uuid}. // TODO: BigInt? From a9a82ea1a77e880310b071a02520d22a644018d5 Mon Sep 17 00:00:00 2001 From: Richard Newman Date: Mon, 17 Apr 2017 18:46:40 -0700 Subject: [PATCH 5/9] Part 1: parse :in. We also at this point switch from using `Vec` to `BTreeSet`. This allows us to guarantee no duplicates later; we'll reject duplicates at parse time. --- query-parser/src/parse.rs | 42 ++++++++++++++++++++++++++++++++------- query/src/lib.rs | 21 ++++++++++++++------ 2 files changed, 50 insertions(+), 13 deletions(-) diff --git a/query-parser/src/parse.rs b/query-parser/src/parse.rs index 5b4d879d..2952afdc 100644 --- a/query-parser/src/parse.rs +++ b/query-parser/src/parse.rs @@ -15,6 +15,8 @@ extern crate mentat_query; use std; // To refer to std::result::Result. +use std::collections::BTreeSet; + use self::combine::{eof, many, many1, optional, parser, satisfy, satisfy_map, Parser, ParseResult, Stream}; use self::combine::combinator::{choice, or, try}; @@ -65,6 +67,11 @@ error_chain! { } errors { + DuplicateVariableError { + description("duplicate variable") + display("duplicates in variable list") + } + NotAVariableError(value: edn::ValueAndSpan) { description("not a variable") display("not a variable: '{}'", value) @@ -328,6 +335,8 @@ def_parser!(Find, spec, FindSpec, { def_matches_keyword!(Find, literal_find, "find"); +def_matches_keyword!(Find, literal_in, "in"); + def_matches_keyword!(Find, literal_with, "with"); def_matches_keyword!(Find, literal_where, "where"); @@ -337,11 +346,26 @@ def_matches_keyword!(Find, literal_order, "order"); /// Express something close to a builder pattern for a `FindQuery`. enum FindQueryPart { FindSpec(FindSpec), - With(Vec), + With(BTreeSet), + In(BTreeSet), WhereClauses(Vec), Order(Vec), } +def_parser!(Find, vars, BTreeSet, { + vector().of_exactly(many(Query::variable()).map(|vars: Vec| { + let given = vars.len(); + let set: BTreeSet = vars.into_iter().collect(); + if given != set.len() { + // TODO: find out what the variable is! + // TODO: figure out how to use `and_then` to return an error here. + panic!(Error::from_kind(ErrorKind::DuplicateVariableError)); + } else { + set + } + })) +}); + /// This is awkward, but will do for now. We use `keyword_map()` to optionally accept vector find /// queries, then we use `FindQueryPart` to collect parts that have heterogeneous types; and then we /// construct a `FindQuery` from them. @@ -349,8 +373,9 @@ def_parser!(Find, query, FindQuery, { let p_find_spec = Find::literal_find() .with(vector().of_exactly(Find::spec().map(FindQueryPart::FindSpec))); - let p_with_vars = Find::literal_with() - .with(vector().of_exactly(many(Query::variable()).map(FindQueryPart::With))); + let p_in_vars = Find::literal_in().with(Find::vars().map(FindQueryPart::In)); + + let p_with_vars = Find::literal_with().with(Find::vars().map(FindQueryPart::With)); let p_where_clauses = Find::literal_where() .with(vector().of_exactly(Where::clauses().map(FindQueryPart::WhereClauses))).expected(":where clauses"); @@ -359,14 +384,16 @@ def_parser!(Find, query, FindQuery, { .with(vector().of_exactly(many1(Query::order()).map(FindQueryPart::Order))); (or(map(), keyword_map())) - .of_exactly(many(choice::<[&mut Parser; 4], _>([ + .of_exactly(many(choice::<[&mut Parser; 5], _>([ &mut try(p_find_spec), + &mut try(p_in_vars), &mut try(p_with_vars), &mut try(p_where_clauses), &mut try(p_order_clauses), ]))) .and_then(|parts: Vec| -> std::result::Result> { let mut find_spec = None; + let mut in_vars = None; let mut with_vars = None; let mut where_clauses = None; let mut order_clauses = None; @@ -375,6 +402,7 @@ def_parser!(Find, query, FindQuery, { match part { FindQueryPart::FindSpec(x) => find_spec = Some(x), FindQueryPart::With(x) => with_vars = Some(x), + FindQueryPart::In(x) => in_vars = Some(x), FindQueryPart::WhereClauses(x) => where_clauses = Some(x), FindQueryPart::Order(x) => order_clauses = Some(x), } @@ -383,9 +411,9 @@ def_parser!(Find, query, FindQuery, { Ok(FindQuery { find_spec: find_spec.clone().ok_or(combine::primitives::Error::Unexpected("expected :find".into()))?, default_source: SrcVar::DefaultSrc, - with: with_vars.unwrap_or(vec![]), - in_vars: vec![], // TODO - in_sources: vec![], // TODO + with: with_vars.unwrap_or(BTreeSet::default()), + in_vars: in_vars.unwrap_or(BTreeSet::default()), + in_sources: BTreeSet::default(), // TODO order: order_clauses, where_clauses: where_clauses.ok_or(combine::primitives::Error::Unexpected("expected :where".into()))?, }) diff --git a/query/src/lib.rs b/query/src/lib.rs index 3252c447..3a1c501a 100644 --- a/query/src/lib.rs +++ b/query/src/lib.rs @@ -33,12 +33,21 @@ extern crate edn; extern crate mentat_core; -use std::collections::BTreeSet; +use std::collections::{ + BTreeMap, + BTreeSet, +}; + use std::fmt; use std::rc::Rc; + use edn::{BigInt, OrderedFloat}; pub use edn::{NamespacedKeyword, PlainSymbol}; -use mentat_core::TypedValue; + +use mentat_core::{ + TypedValue, + ValueType, +}; pub type SrcVarName = String; // Do not include the required syntactic '$'. @@ -138,7 +147,7 @@ pub enum Direction { #[derive(Clone, Debug, Eq, PartialEq)] pub struct Order(pub Direction, pub Variable); // Future: Element instead of Variable? -#[derive(Clone, Debug, Eq, PartialEq)] +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] pub enum SrcVar { DefaultSrc, NamedSrc(SrcVarName), @@ -600,9 +609,9 @@ pub enum WhereClause { pub struct FindQuery { pub find_spec: FindSpec, pub default_source: SrcVar, - pub with: Vec, - pub in_vars: Vec, - pub in_sources: Vec, + pub with: BTreeSet, + pub in_vars: BTreeSet, + pub in_sources: BTreeSet, pub where_clauses: Vec, pub order: Option>, // TODO: in_rules; From 651308f7218f2859aa05b1b6d224db486dd081d0 Mon Sep 17 00:00:00 2001 From: Richard Newman Date: Mon, 17 Apr 2017 21:09:18 -0700 Subject: [PATCH 6/9] Part 2: define a type to encapsulate query inputs. This is for two reasons. Firstly, we need to track the types of inputs, their values, and also the input variables; adding a struct gives us a little more clarity. Secondly, when we come to implement prepared statements, we'll be algebrizing queries without having the values available. We'll be able to do a better job of algebrizing, and also do more validating, if we allow callers to specify the types of variables in advance, even if the values aren't known. --- query-algebrizer/src/clauses/inputs.rs | 72 ++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 query-algebrizer/src/clauses/inputs.rs diff --git a/query-algebrizer/src/clauses/inputs.rs b/query-algebrizer/src/clauses/inputs.rs new file mode 100644 index 00000000..55f8daa1 --- /dev/null +++ b/query-algebrizer/src/clauses/inputs.rs @@ -0,0 +1,72 @@ +// Copyright 2016 Mozilla +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use +// this file except in compliance with the License. You may obtain a copy of the +// License at http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +use std::collections::BTreeMap; + +use mentat_core::{ + TypedValue, + ValueType, +}; + +use mentat_query::{ + Variable, +}; + +pub use errors::{ + Error, + ErrorKind, + Result, +}; + +/// Define the inputs to a query. This is in two parts: a set of values known now, and a set of +/// types known now. +/// The separate map of types is to allow queries to be algebrized without full knowledge of +/// the bindings that will be used at execution time. +/// When built correctly, `types` is guaranteed to contain the types of `values` -- use +/// `QueryInputs::new` or `QueryInputs::with_values` to construct an instance. +pub struct QueryInputs { + pub types: BTreeMap, + pub values: BTreeMap, +} + +impl Default for QueryInputs { + fn default() -> Self { + QueryInputs { types: BTreeMap::default(), values: BTreeMap::default() } + } +} + +impl QueryInputs { + pub fn with_value_sequence(vals: Vec<(Variable, TypedValue)>) -> QueryInputs { + let values: BTreeMap = vals.into_iter().collect(); + QueryInputs::with_values(values) + } + + pub fn with_values(values: BTreeMap) -> QueryInputs { + QueryInputs { + types: values.iter().map(|(var, val)| (var.clone(), val.value_type())).collect(), + values: values, + } + } + + pub fn new(mut types: BTreeMap, + values: BTreeMap) -> Result { + // Make sure that the types of the values agree with those in types, and collect. + for (var, v) in values.iter() { + let t = v.value_type(); + let old = types.insert(var.clone(), t); + if let Some(old) = old { + if old != t { + bail!(ErrorKind::InputTypeDisagreement(var.name(), old, t)); + } + } + } + Ok(QueryInputs { types: types, values: values }) + } +} From dfc846e4834b83b051bd8a3870a229010832265d Mon Sep 17 00:00:00 2001 From: Richard Newman Date: Mon, 17 Apr 2017 21:10:32 -0700 Subject: [PATCH 7/9] Part 3: define keep_intersected_keys. We'll use this to drop unneeded values from input maps, if lazy callers reuse a general-purpose map for multiple queries. --- query-algebrizer/src/clauses/mod.rs | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/query-algebrizer/src/clauses/mod.rs b/query-algebrizer/src/clauses/mod.rs index c8407583..e606a24e 100644 --- a/query-algebrizer/src/clauses/mod.rs +++ b/query-algebrizer/src/clauses/mod.rs @@ -89,6 +89,7 @@ trait Contains { trait Intersection { fn with_intersected_keys(&self, ks: &BTreeSet) -> Self; + fn keep_intersected_keys(&mut self, ks: &BTreeSet); } impl Contains for BTreeSet { @@ -108,6 +109,22 @@ impl Intersection for BTreeMap { .filter_map(|(k, v)| ks.when_contains(k, || (k.clone(), v.clone()))) .collect() } + + /// Remove all keys from the map that are not present in `ks`. + /// This implementation is terrible because there's no mutable iterator for BTreeMap. + fn keep_intersected_keys(&mut self, ks: &BTreeSet) { + let mut to_remove = Vec::with_capacity(self.len() - ks.len()); + { + for k in self.keys() { + if !ks.contains(k) { + to_remove.push(k.clone()) + } + } + } + for k in to_remove.into_iter() { + self.remove(&k); + } + } } /// A `ConjoiningClauses` (CC) is a collection of clauses that are combined with `JOIN`. From 60c082b61ef626948e1a3404acc8a011ab8187ad Mon Sep 17 00:00:00 2001 From: Richard Newman Date: Mon, 17 Apr 2017 13:14:30 -0700 Subject: [PATCH 8/9] Part 4: pass inputs through algebrizing and execution. (#418) This also adds a test that an `UnboundVariables` error is raised if a variable mentioned in the `:in` clause isn't bound. --- query-algebrizer/src/clauses/mod.rs | 57 +++++++++++++++++-------- query-algebrizer/src/clauses/pattern.rs | 14 ++++-- query-algebrizer/src/errors.rs | 7 +++ query-algebrizer/src/lib.rs | 26 ++++++++--- query/src/lib.rs | 4 +- src/conn.rs | 7 +-- src/errors.rs | 7 +++ src/lib.rs | 2 + src/query.rs | 38 +++++++++++------ tests/query.rs | 45 +++++++++++++++++++ 10 files changed, 159 insertions(+), 48 deletions(-) diff --git a/query-algebrizer/src/clauses/mod.rs b/query-algebrizer/src/clauses/mod.rs index e606a24e..c7d0822f 100644 --- a/query-algebrizer/src/clauses/mod.rs +++ b/query-algebrizer/src/clauses/mod.rs @@ -59,6 +59,7 @@ use types::{ TableAlias, }; +mod inputs; mod or; mod pattern; mod predicate; @@ -66,6 +67,8 @@ mod resolve; use validate::validate_or_join; +pub use self::inputs::QueryInputs; + // We do this a lot for errors. trait RcCloned { fn cloned(&self) -> T; @@ -240,6 +243,38 @@ impl ConjoiningClauses { ..Default::default() } } + + pub fn with_inputs(in_variables: BTreeSet, inputs: T) -> ConjoiningClauses + where T: Into> { + ConjoiningClauses::with_inputs_and_alias_counter(in_variables, inputs, RcCounter::new()) + } + + pub fn with_inputs_and_alias_counter(in_variables: BTreeSet, + inputs: T, + alias_counter: RcCounter) -> ConjoiningClauses + where T: Into> { + match inputs.into() { + None => ConjoiningClauses::with_alias_counter(alias_counter), + Some(QueryInputs { mut types, mut values }) => { + // Discard any bindings not mentioned in our :in clause. + types.keep_intersected_keys(&in_variables); + values.keep_intersected_keys(&in_variables); + + let mut cc = ConjoiningClauses { + alias_counter: alias_counter, + input_variables: in_variables, + value_bindings: values, + ..Default::default() + }; + + // Pre-fill our type mappings with the types of the input bindings. + cc.known_types + .extend(types.iter() + .map(|(k, v)| (k.clone(), unit_type_set(*v)))); + cc + }, + } + } } /// Cloning. @@ -271,28 +306,16 @@ impl ConjoiningClauses { } } -impl ConjoiningClauses { - #[allow(dead_code)] - fn with_value_bindings(bindings: BTreeMap) -> ConjoiningClauses { - let mut cc = ConjoiningClauses { - value_bindings: bindings, - ..Default::default() - }; - - // Pre-fill our type mappings with the types of the input bindings. - cc.known_types - .extend(cc.value_bindings - .iter() - .map(|(k, v)| (k.clone(), unit_type_set(v.value_type())))); - cc - } -} - impl ConjoiningClauses { pub fn bound_value(&self, var: &Variable) -> Option { self.value_bindings.get(var).cloned() } + /// Return a set of the variables externally bound to values. + pub fn value_bound_variables(&self) -> BTreeSet { + self.value_bindings.keys().cloned().collect() + } + /// Return a single `ValueType` if the given variable is known to have a precise type. /// Returns `None` if the type of the variable is unknown. /// Returns `None` if the type of the variable is known but not precise -- "double diff --git a/query-algebrizer/src/clauses/pattern.rs b/query-algebrizer/src/clauses/pattern.rs index 99afdbf0..21e98138 100644 --- a/query-algebrizer/src/clauses/pattern.rs +++ b/query-algebrizer/src/clauses/pattern.rs @@ -270,6 +270,7 @@ mod testing { use super::*; use std::collections::BTreeMap; + use std::collections::BTreeSet; use std::rc::Rc; use mentat_core::attribute::Unique; @@ -288,6 +289,7 @@ mod testing { }; use clauses::{ + QueryInputs, add_attribute, associate_ident, ident, @@ -660,7 +662,9 @@ mod testing { let b: BTreeMap = vec![(y.clone(), TypedValue::Boolean(true))].into_iter().collect(); - let mut cc = ConjoiningClauses::with_value_bindings(b); + let inputs = QueryInputs::with_values(b); + let variables: BTreeSet = vec![Variable::from_valid_name("?y")].into_iter().collect(); + let mut cc = ConjoiningClauses::with_inputs(variables, inputs); cc.apply_pattern(&schema, Pattern { source: None, @@ -705,7 +709,9 @@ mod testing { let b: BTreeMap = vec![(y.clone(), TypedValue::Long(42))].into_iter().collect(); - let mut cc = ConjoiningClauses::with_value_bindings(b); + let inputs = QueryInputs::with_values(b); + let variables: BTreeSet = vec![Variable::from_valid_name("?y")].into_iter().collect(); + let mut cc = ConjoiningClauses::with_inputs(variables, inputs); cc.apply_pattern(&schema, Pattern { source: None, @@ -737,7 +743,9 @@ mod testing { let b: BTreeMap = vec![(y.clone(), TypedValue::Long(42))].into_iter().collect(); - let mut cc = ConjoiningClauses::with_value_bindings(b); + let inputs = QueryInputs::with_values(b); + let variables: BTreeSet = vec![Variable::from_valid_name("?y")].into_iter().collect(); + let mut cc = ConjoiningClauses::with_inputs(variables, inputs); cc.apply_pattern(&schema, Pattern { source: None, diff --git a/query-algebrizer/src/errors.rs b/query-algebrizer/src/errors.rs index b699039a..256ee970 100644 --- a/query-algebrizer/src/errors.rs +++ b/query-algebrizer/src/errors.rs @@ -10,6 +10,8 @@ extern crate mentat_query; +use mentat_core::ValueType; + use self::mentat_query::{ PlainSymbol, }; @@ -20,6 +22,11 @@ error_chain! { } errors { + InputTypeDisagreement(var: PlainSymbol, declared: ValueType, provided: ValueType) { + description("input type disagreement") + display("value of type {} provided for var {}, expected {}", provided, var, declared) + } + UnknownFunction(name: PlainSymbol) { description("no such function") display("no function named {}", name) diff --git a/query-algebrizer/src/lib.rs b/query-algebrizer/src/lib.rs index 344ee1d2..29b8ad5f 100644 --- a/query-algebrizer/src/lib.rs +++ b/query-algebrizer/src/lib.rs @@ -15,6 +15,7 @@ extern crate mentat_core; extern crate mentat_query; use std::collections::BTreeSet; +use std::ops::Sub; mod errors; mod types; @@ -41,6 +42,10 @@ pub use errors::{ Result, }; +pub use clauses::{ + QueryInputs, +}; + #[allow(dead_code)] pub struct AlgebraicQuery { default_source: SrcVar, @@ -74,16 +79,20 @@ impl AlgebraicQuery { pub fn is_known_empty(&self) -> bool { self.cc.is_known_empty() } + + /// Return a set of the input variables mentioned in the `:in` clause that have not yet been + /// bound. We do this by looking at the CC. + pub fn unbound_variables(&self) -> BTreeSet { + self.cc.input_variables.sub(&self.cc.value_bound_variables()) + } } pub fn algebrize_with_counter(schema: &Schema, parsed: FindQuery, counter: usize) -> Result { - let alias_counter = RcCounter::with_initial(counter); - let cc = clauses::ConjoiningClauses::with_alias_counter(alias_counter); - algebrize_with_cc(schema, parsed, cc) + algebrize_with_inputs(schema, parsed, counter, QueryInputs::default()) } pub fn algebrize(schema: &Schema, parsed: FindQuery) -> Result { - algebrize_with_cc(schema, parsed, clauses::ConjoiningClauses::default()) + algebrize_with_inputs(schema, parsed, 0, QueryInputs::default()) } /// Take an ordering list. Any variables that aren't fixed by the query are used to produce @@ -122,8 +131,13 @@ fn validate_and_simplify_order(cc: &ConjoiningClauses, order: Option> } } -#[allow(dead_code)] -pub fn algebrize_with_cc(schema: &Schema, parsed: FindQuery, mut cc: ConjoiningClauses) -> Result { +pub fn algebrize_with_inputs(schema: &Schema, + parsed: FindQuery, + counter: usize, + inputs: QueryInputs) -> Result { + let alias_counter = RcCounter::with_initial(counter); + let mut cc = ConjoiningClauses::with_inputs_and_alias_counter(parsed.in_vars, inputs, alias_counter); + // TODO: integrate default source into pattern processing. // TODO: flesh out the rest of find-into-context. let where_clauses = parsed.where_clauses; diff --git a/query/src/lib.rs b/query/src/lib.rs index 3a1c501a..3c267b07 100644 --- a/query/src/lib.rs +++ b/query/src/lib.rs @@ -34,7 +34,6 @@ extern crate edn; extern crate mentat_core; use std::collections::{ - BTreeMap, BTreeSet, }; @@ -46,7 +45,6 @@ pub use edn::{NamespacedKeyword, PlainSymbol}; use mentat_core::{ TypedValue, - ValueType, }; pub type SrcVarName = String; // Do not include the required syntactic '$'. @@ -743,4 +741,4 @@ impl ContainsVariables for Pattern { acc_ref(acc, v) } } -} +} \ No newline at end of file diff --git a/src/conn.rs b/src/conn.rs index 340e0210..88a26b70 100644 --- a/src/conn.rs +++ b/src/conn.rs @@ -10,7 +10,6 @@ #![allow(dead_code)] -use std::collections::HashMap; use std::sync::{Arc, Mutex}; use rusqlite; @@ -19,7 +18,6 @@ use edn; use mentat_core::{ Schema, - TypedValue, }; use mentat_db::db; @@ -29,13 +27,12 @@ use mentat_db::{ TxReport, }; -use mentat_query::Variable; - use mentat_tx_parser; use errors::*; use query::{ q_once, + QueryInputs, QueryResults, }; @@ -118,7 +115,7 @@ impl Conn { query: &str, inputs: T, limit: U) -> Result - where T: Into>>, + where T: Into>, U: Into> { diff --git a/src/errors.rs b/src/errors.rs index b623fd75..c9172ae0 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -12,6 +12,8 @@ use rusqlite; +use std::collections::BTreeSet; + use edn; use mentat_db; use mentat_query_algebrizer; @@ -40,6 +42,11 @@ error_chain! { } errors { + UnboundVariables(names: BTreeSet) { + description("unbound variables at execution time") + display("variables {:?} unbound at execution time", names) + } + InvalidArgumentName(name: String) { description("invalid argument name") display("invalid argument name: '{}'", name) diff --git a/src/lib.rs b/src/lib.rs index 89b60890..c9686a86 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -54,7 +54,9 @@ pub use mentat_db::{ pub use query::{ NamespacedKeyword, PlainSymbol, + QueryInputs, QueryResults, + Variable, q_once, }; diff --git a/src/query.rs b/src/query.rs index 89515499..5a4f8fb0 100644 --- a/src/query.rs +++ b/src/query.rs @@ -8,17 +8,20 @@ // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. -use std::collections::HashMap; - use rusqlite; use rusqlite::types::ToSql; use mentat_core::{ Schema, - TypedValue, }; -use mentat_query_algebrizer::algebrize; +use mentat_query_algebrizer::{ + algebrize_with_inputs, +}; + +pub use mentat_query_algebrizer::{ + QueryInputs, +}; pub use mentat_query::{ NamespacedKeyword, @@ -42,31 +45,31 @@ pub use mentat_query_projector::{ QueryResults, }; -use errors::Result; +use errors::{ + ErrorKind, + Result, +}; pub type QueryExecutionResult = Result; -/// Take an EDN query string, a reference to a open SQLite connection, a Mentat DB, and an optional -/// collection of input bindings (which should be keyed by `"?varname"`), and execute the query -/// immediately, blocking the current thread. +/// Take an EDN query string, a reference to an open SQLite connection, a Mentat schema, and an +/// optional collection of input bindings (which should be keyed by `"?varname"`), and execute the +/// query immediately, blocking the current thread. /// Returns a structure that corresponds to the kind of input query, populated with `TypedValue` /// instances. -/// The caller is responsible for ensuring that the SQLite connection is in a transaction if +/// The caller is responsible for ensuring that the SQLite connection has an open transaction if /// isolation is required. -#[allow(unused_variables)] pub fn q_once<'sqlite, 'schema, 'query, T, U> (sqlite: &'sqlite rusqlite::Connection, schema: &'schema Schema, query: &'query str, inputs: T, limit: U) -> QueryExecutionResult - where T: Into>>, + where T: Into>, U: Into> { - // TODO: validate inputs. - let parsed = parse_find_string(query)?; - let mut algebrized = algebrize(schema, parsed)?; + let mut algebrized = algebrize_with_inputs(schema, parsed, 0, inputs.into().unwrap_or(QueryInputs::default()))?; if algebrized.is_known_empty() { // We don't need to do any SQL work at all. @@ -75,6 +78,13 @@ pub fn q_once<'sqlite, 'schema, 'query, T, U> algebrized.apply_limit(limit.into()); + // Because this is q_once, we can check that all of our `:in` variables are bound at this point. + // If they aren't, the user has made an error -- perhaps writing the wrong variable in `:in`, or + // not binding in the `QueryInput`. + let unbound = algebrized.unbound_variables(); + if !unbound.is_empty() { + bail!(ErrorKind::UnboundVariables(unbound.into_iter().map(|v| v.to_string()).collect())); + } let select = query_to_select(algebrized); let SQLQuery { sql, args } = select.query.to_sql_query()?; diff --git a/tests/query.rs b/tests/query.rs index 8ad3b582..65c6880d 100644 --- a/tests/query.rs +++ b/tests/query.rs @@ -21,11 +21,18 @@ use mentat_core::{ use mentat::{ NamespacedKeyword, + QueryInputs, QueryResults, + Variable, new_connection, q_once, }; +use mentat::errors::{ + Error, + ErrorKind, +}; + #[test] fn test_rel() { let mut c = new_connection("").expect("Couldn't open conn."); @@ -159,3 +166,41 @@ fn test_coll() { println!("Coll took {}µs", start.to(end).num_microseconds().unwrap()); } +#[test] +fn test_inputs() { + let mut c = new_connection("").expect("Couldn't open conn."); + let db = mentat_db::db::ensure_current_version(&mut c).expect("Couldn't open DB."); + + // entids::DB_INSTALL_VALUE_TYPE = 5. + let ee = (Variable::from_valid_name("?e"), TypedValue::Ref(5)); + let inputs = QueryInputs::with_value_sequence(vec![ee]); + let results = q_once(&c, &db.schema, + "[:find ?i . :in ?e :where [?e :db/ident ?i]]", inputs, None) + .expect("query to succeed"); + + if let QueryResults::Scalar(Some(TypedValue::Keyword(value))) = results { + assert_eq!(value.as_ref(), &NamespacedKeyword::new("db.install", "valueType")); + } else { + panic!("Expected scalar."); + } +} + +/// Ensure that a query won't be run without all of its `:in` variables being bound. +#[test] +fn test_unbound_inputs() { + let mut c = new_connection("").expect("Couldn't open conn."); + let db = mentat_db::db::ensure_current_version(&mut c).expect("Couldn't open DB."); + + // Bind the wrong var by 'mistake'. + let xx = (Variable::from_valid_name("?x"), TypedValue::Ref(5)); + let inputs = QueryInputs::with_value_sequence(vec![xx]); + let results = q_once(&c, &db.schema, + "[:find ?i . :in ?e :where [?e :db/ident ?i]]", inputs, None); + + match results { + Result::Err(Error(ErrorKind::UnboundVariables(vars), _)) => { + assert_eq!(vars, vec!["?e".to_string()].into_iter().collect()); + }, + _ => panic!("Expected unbound variables."), + } +} From ff0147e89cc6cd1cce912c74a245c6da3a941cd7 Mon Sep 17 00:00:00 2001 From: Nick Alexander Date: Tue, 18 Apr 2017 13:01:26 -0700 Subject: [PATCH 9/9] Review comments: downgrade to error-chain 0.8.1 for Send + Sync bound; use combine::primitive::Error. --- Cargo.toml | 2 +- db/Cargo.toml | 2 +- query-algebrizer/Cargo.toml | 2 +- query-parser/Cargo.toml | 2 +- query-parser/src/parse.rs | 35 +++++++++++++++++++++++++++++++---- query-projector/Cargo.toml | 2 +- sql/Cargo.toml | 2 +- tx-parser/Cargo.toml | 2 +- 8 files changed, 38 insertions(+), 11 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index faf9702d..8849b8c9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,7 +18,7 @@ rustc_version = "0.1.7" [dependencies] clap = "2.19.3" -error-chain = "0.9.0" +error-chain = "0.8.1" nickel = "0.9.0" slog = "1.4.0" slog-scope = "0.2.2" diff --git a/db/Cargo.toml b/db/Cargo.toml index a6b4e26d..b0deae04 100644 --- a/db/Cargo.toml +++ b/db/Cargo.toml @@ -4,7 +4,7 @@ version = "0.0.1" workspace = ".." [dependencies] -error-chain = "0.9.0" +error-chain = "0.8.1" itertools = "0.5.9" lazy_static = "0.2.2" ordered-float = "0.4.0" diff --git a/query-algebrizer/Cargo.toml b/query-algebrizer/Cargo.toml index 71c15f50..c21bc49b 100644 --- a/query-algebrizer/Cargo.toml +++ b/query-algebrizer/Cargo.toml @@ -4,7 +4,7 @@ version = "0.0.1" workspace = ".." [dependencies] -error-chain = "0.9.0" +error-chain = "0.8.1" [dependencies.mentat_core] path = "../core" diff --git a/query-parser/Cargo.toml b/query-parser/Cargo.toml index 7668db30..7dde1314 100644 --- a/query-parser/Cargo.toml +++ b/query-parser/Cargo.toml @@ -5,7 +5,7 @@ workspace = ".." [dependencies] combine = "2.2.2" -error-chain = "0.9.0" +error-chain = "0.8.1" matches = "0.1" [dependencies.edn] diff --git a/query-parser/src/parse.rs b/query-parser/src/parse.rs index 2952afdc..3017b797 100644 --- a/query-parser/src/parse.rs +++ b/query-parser/src/parse.rs @@ -353,15 +353,15 @@ enum FindQueryPart { } def_parser!(Find, vars, BTreeSet, { - vector().of_exactly(many(Query::variable()).map(|vars: Vec| { + vector().of_exactly(many(Query::variable()).and_then(|vars: Vec| { let given = vars.len(); let set: BTreeSet = vars.into_iter().collect(); if given != set.len() { // TODO: find out what the variable is! - // TODO: figure out how to use `and_then` to return an error here. - panic!(Error::from_kind(ErrorKind::DuplicateVariableError)); + let e = Box::new(Error::from_kind(ErrorKind::DuplicateVariableError)); + Err(combine::primitives::Error::Other(e)) } else { - set + Ok(set) } })) }); @@ -549,6 +549,33 @@ mod test { vec![variable(e.clone())]); } + #[test] + fn test_repeated_vars() { + let e = edn::PlainSymbol::new("?e"); + let f = edn::PlainSymbol::new("?f"); + let input = edn::Value::Vector(vec![edn::Value::PlainSymbol(e.clone()), + edn::Value::PlainSymbol(f.clone()),]); + assert_parses_to!(Find::vars, input, + vec![variable(e.clone()), variable(f.clone())].into_iter().collect()); + + let g = edn::PlainSymbol::new("?g"); + let input = edn::Value::Vector(vec![edn::Value::PlainSymbol(g.clone()), + edn::Value::PlainSymbol(g.clone()),]); + + let mut par = Find::vars(); + let result = par.parse(input.with_spans().into_atom_stream()) + .map(|x| x.0) + .map_err(|e| if let Some(combine::primitives::Error::Other(x)) = e.errors.into_iter().next() { + // Pattern matching on boxes is rocket science until Rust Nightly features hit + // stable. ErrorKind isn't Clone, so convert to strings. We could pattern match + // for exact comparison here. + x.downcast::().ok().map(|e| e.to_string()) + } else { + None + }); + assert_eq!(result, Err(Some("duplicates in variable list".to_string()))); + } + #[test] fn test_or() { let oj = edn::PlainSymbol::new("or"); diff --git a/query-projector/Cargo.toml b/query-projector/Cargo.toml index 9ef12ac0..0bd54f1f 100644 --- a/query-projector/Cargo.toml +++ b/query-projector/Cargo.toml @@ -4,7 +4,7 @@ version = "0.0.1" workspace = ".." [dependencies] -error-chain = "0.9.0" +error-chain = "0.8.1" [dependencies.rusqlite] version = "0.10.1" diff --git a/sql/Cargo.toml b/sql/Cargo.toml index d6e57d77..868d07e4 100644 --- a/sql/Cargo.toml +++ b/sql/Cargo.toml @@ -4,7 +4,7 @@ version = "0.0.1" workspace = ".." [dependencies] -error-chain = "0.9.0" +error-chain = "0.8.1" ordered-float = "0.4.0" [dependencies.mentat_core] diff --git a/tx-parser/Cargo.toml b/tx-parser/Cargo.toml index cb0c051d..1fc8839c 100644 --- a/tx-parser/Cargo.toml +++ b/tx-parser/Cargo.toml @@ -5,7 +5,7 @@ workspace = ".." [dependencies] combine = "2.2.2" -error-chain = "0.9.0" +error-chain = "0.8.1" [dependencies.edn] path = "../edn"