diff --git a/.vscode/tasks.json b/.vscode/tasks.json index 6503d0ba..618c6aa7 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -21,7 +21,7 @@ "label": "Run CLI", "command": "cargo", "args": [ - "cli", + "debugcli", ], "problemMatcher": [ "$rustc" diff --git a/core/src/lib.rs b/core/src/lib.rs index b36c85d0..47ac4e15 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -293,6 +293,12 @@ impl From for TypedValue { } } +impl<'a> From<&'a str> for TypedValue { + fn from(value: &'a str) -> TypedValue { + TypedValue::String(Rc::new(value.to_string())) + } +} + impl From for TypedValue { fn from(value: String) -> TypedValue { TypedValue::String(Rc::new(value)) @@ -449,6 +455,15 @@ impl ValueTypeSet { ValueTypeSet(EnumSet::of_both(ValueType::Double, ValueType::Long)) } + /// Return a set containing `Double`, `Long`, and `Instant`. + pub fn of_numeric_and_instant_types() -> ValueTypeSet { + let mut s = EnumSet::new(); + s.insert(ValueType::Double); + s.insert(ValueType::Long); + s.insert(ValueType::Instant); + ValueTypeSet(s) + } + /// Return a set containing `Ref` and `Keyword`. pub fn of_keywords() -> ValueTypeSet { ValueTypeSet(EnumSet::of_both(ValueType::Ref, ValueType::Keyword)) @@ -516,6 +531,18 @@ impl ValueTypeSet { } } +impl From for ValueTypeSet { + fn from(t: ValueType) -> Self { + ValueTypeSet::of_one(t) + } +} + +impl ValueTypeSet { + pub fn is_only_numeric(&self) -> bool { + self.is_subset(&ValueTypeSet::of_numeric_types()) + } +} + impl IntoIterator for ValueTypeSet { type Item = ValueType; type IntoIter = ::enum_set::Iter; @@ -541,10 +568,16 @@ impl ::std::iter::Extend for ValueTypeSet { } } +/// We have an enum of types, `ValueType`. It can be collected into a set, `ValueTypeSet`. Each type +/// is associated with a type tag, which is how a type is represented in, e.g., SQL storage. Types +/// can share type tags, because backing SQL storage is able to differentiate between some types +/// (e.g., longs and doubles), and so distinct tags aren't necessary. That association is defined by +/// `SQLValueType`. That trait similarly extends to `ValueTypeSet`, which maps a collection of types +/// into a collection of tags. pub trait SQLValueTypeSet { fn value_type_tags(&self) -> BTreeSet; - fn has_unique_type_code(&self) -> bool; - fn unique_type_code(&self) -> Option; + fn has_unique_type_tag(&self) -> bool; + fn unique_type_tag(&self) -> Option; } impl SQLValueTypeSet for ValueTypeSet { @@ -557,15 +590,15 @@ impl SQLValueTypeSet for ValueTypeSet { out } - fn unique_type_code(&self) -> Option { - if self.is_unit() || self.has_unique_type_code() { + fn unique_type_tag(&self) -> Option { + if self.is_unit() || self.has_unique_type_tag() { self.exemplar().map(|t| t.value_type_tag()) } else { None } } - fn has_unique_type_code(&self) -> bool { + fn has_unique_type_tag(&self) -> bool { if self.is_unit() { return true; } diff --git a/query-algebrizer/src/clauses/fulltext.rs b/query-algebrizer/src/clauses/fulltext.rs index 642d1a71..f01054cc 100644 --- a/query-algebrizer/src/clauses/fulltext.rs +++ b/query-algebrizer/src/clauses/fulltext.rs @@ -94,7 +94,7 @@ impl ConjoiningClauses { // TODO: process source variables. match args.next().unwrap() { FnArg::SrcVar(SrcVar::DefaultSrc) => {}, - _ => bail!(ErrorKind::InvalidArgument(where_fn.operator.clone(), "source variable".into(), 0)), + _ => bail!(ErrorKind::InvalidArgument(where_fn.operator.clone(), "source variable", 0)), } let schema = known.schema; @@ -127,8 +127,11 @@ impl ConjoiningClauses { // An unknown ident, or an entity that isn't present in the store, or isn't a fulltext // attribute, is likely enough to be a coding error that we choose to bail instead of // marking the pattern as known-empty. - let a = a.ok_or(ErrorKind::InvalidArgument(where_fn.operator.clone(), "attribute".into(), 1))?; - let attribute = schema.attribute_for_entid(a).cloned().ok_or(ErrorKind::InvalidArgument(where_fn.operator.clone(), "attribute".into(), 1))?; + let a = a.ok_or(ErrorKind::InvalidArgument(where_fn.operator.clone(), "attribute", 1))?; + let attribute = schema.attribute_for_entid(a) + .cloned() + .ok_or(ErrorKind::InvalidArgument(where_fn.operator.clone(), + "attribute", 1))?; if !attribute.fulltext { // We can never get results from a non-fulltext attribute! @@ -166,12 +169,12 @@ impl ConjoiningClauses { FnArg::Variable(in_var) => { match self.bound_value(&in_var) { Some(t @ TypedValue::String(_)) => Either::Left(t), - Some(_) => bail!(ErrorKind::InvalidArgument(where_fn.operator.clone(), "string".into(), 2)), + Some(_) => bail!(ErrorKind::InvalidArgument(where_fn.operator.clone(), "string", 2)), None => { // Regardless of whether we'll be providing a string later, or the value // comes from a column, it must be a string. if self.known_type(&in_var) != Some(ValueType::String) { - bail!(ErrorKind::InvalidArgument(where_fn.operator.clone(), "string".into(), 2)); + bail!(ErrorKind::InvalidArgument(where_fn.operator.clone(), "string", 2)); } if self.input_variables.contains(&in_var) { @@ -192,7 +195,7 @@ impl ConjoiningClauses { }, } }, - _ => bail!(ErrorKind::InvalidArgument(where_fn.operator.clone(), "string".into(), 2)), + _ => bail!(ErrorKind::InvalidArgument(where_fn.operator.clone(), "string", 2)), }; let qv = match search { diff --git a/query-algebrizer/src/clauses/mod.rs b/query-algebrizer/src/clauses/mod.rs index f43aa5fb..a3a00589 100644 --- a/query-algebrizer/src/clauses/mod.rs +++ b/query-algebrizer/src/clauses/mod.rs @@ -286,6 +286,18 @@ impl Default for ConjoiningClauses { } } +pub struct VariableIterator<'a>( + ::std::collections::btree_map::Keys<'a, Variable, TypedValue>, +); + +impl<'a> Iterator for VariableIterator<'a> { + type Item = &'a Variable; + + fn next(&mut self) -> Option<&'a Variable> { + self.0.next() + } +} + impl ConjoiningClauses { /// Construct a new `ConjoiningClauses` with the provided alias counter. This allows a caller /// to share a counter with an enclosing scope, and to start counting at a particular offset @@ -390,7 +402,7 @@ impl ConjoiningClauses { self.value_bindings.get(var).cloned() } - pub(crate) fn is_value_bound(&self, var: &Variable) -> bool { + pub fn is_value_bound(&self, var: &Variable) -> bool { self.value_bindings.contains_key(var) } @@ -398,9 +410,14 @@ impl ConjoiningClauses { self.value_bindings.with_intersected_keys(variables) } + /// Return an iterator over the variables externally bound to values. + pub fn value_bound_variables(&self) -> VariableIterator { + VariableIterator(self.value_bindings.keys()) + } + /// Return a set of the variables externally bound to values. - pub(crate) fn value_bound_variable_set(&self) -> BTreeSet { - self.value_bindings.keys().cloned().collect() + pub fn value_bound_variable_set(&self) -> BTreeSet { + self.value_bound_variables().cloned().collect() } /// Return a single `ValueType` if the given variable is known to have a precise type. @@ -414,7 +431,7 @@ impl ConjoiningClauses { } } - pub(crate) fn known_type_set(&self, var: &Variable) -> ValueTypeSet { + pub fn known_type_set(&self, var: &Variable) -> ValueTypeSet { self.known_types.get(var).cloned().unwrap_or(ValueTypeSet::any()) } diff --git a/query-algebrizer/src/clauses/predicate.rs b/query-algebrizer/src/clauses/predicate.rs index f2b6a509..a4dc601c 100644 --- a/query-algebrizer/src/clauses/predicate.rs +++ b/query-algebrizer/src/clauses/predicate.rs @@ -92,13 +92,13 @@ impl ConjoiningClauses { let mut left_types = self.potential_types(known.schema, &left)? .intersection(&supported_types); if left_types.is_empty() { - bail!(ErrorKind::InvalidArgument(predicate.operator.clone(), "numeric or instant", 0)); + bail!(ErrorKind::InvalidArgumentType(predicate.operator.clone(), supported_types, 0)); } let mut right_types = self.potential_types(known.schema, &right)? .intersection(&supported_types); if right_types.is_empty() { - bail!(ErrorKind::InvalidArgument(predicate.operator.clone(), "numeric or instant", 1)); + bail!(ErrorKind::InvalidArgumentType(predicate.operator.clone(), supported_types, 1)); } // We would like to allow longs to compare to doubles. @@ -134,14 +134,18 @@ impl ConjoiningClauses { // We expect the intersection to be Long, Long+Double, Double, or Instant. let left_v; let right_v; + if shared_types == ValueTypeSet::of_one(ValueType::Instant) { left_v = self.resolve_instant_argument(&predicate.operator, 0, left)?; right_v = self.resolve_instant_argument(&predicate.operator, 1, right)?; - } else if !shared_types.is_empty() && shared_types.is_subset(&ValueTypeSet::of_numeric_types()) { + } else if shared_types.is_only_numeric() { left_v = self.resolve_numeric_argument(&predicate.operator, 0, left)?; right_v = self.resolve_numeric_argument(&predicate.operator, 1, right)?; + } else if shared_types == ValueTypeSet::of_one(ValueType::Ref) { + left_v = self.resolve_ref_argument(known.schema, &predicate.operator, 0, left)?; + right_v = self.resolve_ref_argument(known.schema, &predicate.operator, 1, right)?; } else { - bail!(ErrorKind::InvalidArgument(predicate.operator.clone(), "numeric or instant", 0)); + bail!(ErrorKind::InvalidArgumentType(predicate.operator.clone(), supported_types, 0)); } // These arguments must be variables or instant/numeric constants. diff --git a/query-algebrizer/src/clauses/resolve.rs b/query-algebrizer/src/clauses/resolve.rs index 9b86a10c..c3169580 100644 --- a/query-algebrizer/src/clauses/resolve.rs +++ b/query-algebrizer/src/clauses/resolve.rs @@ -9,6 +9,8 @@ // specific language governing permissions and limitations under the License. use mentat_core::{ + HasSchema, + Schema, TypedValue, ValueType, }; @@ -92,11 +94,49 @@ impl ConjoiningClauses { Constant(NonIntegerConstant::BigInteger(_)) | Vector(_) => { self.mark_known_empty(EmptyBecause::NonInstantArgument); - bail!(ErrorKind::InvalidArgument(function.clone(), "instant", position)); + bail!(ErrorKind::InvalidArgumentType(function.clone(), ValueType::Instant.into(), position)); }, } } + /// Take a function argument and turn it into a `QueryValue` suitable for use in a concrete + /// constraint. + pub(crate) fn resolve_ref_argument(&mut self, schema: &Schema, function: &PlainSymbol, position: usize, arg: FnArg) -> Result { + use self::FnArg::*; + match arg { + FnArg::Variable(var) => { + self.constrain_var_to_type(var.clone(), ValueType::Ref); + if let Some(TypedValue::Ref(e)) = self.bound_value(&var) { + // Incorrect types will be handled by the constraint, above. + Ok(QueryValue::Entid(e)) + } else { + self.column_bindings + .get(&var) + .and_then(|cols| cols.first().map(|col| QueryValue::Column(col.clone()))) + .ok_or_else(|| Error::from_kind(ErrorKind::UnboundVariable(var.name()))) + } + }, + EntidOrInteger(i) => Ok(QueryValue::TypedValue(TypedValue::Ref(i))), + IdentOrKeyword(i) => { + schema.get_entid(&i) + .map(|known_entid| QueryValue::Entid(known_entid.into())) + .ok_or_else(|| Error::from_kind(ErrorKind::UnrecognizedIdent(i.to_string()))) + }, + Constant(NonIntegerConstant::Boolean(_)) | + Constant(NonIntegerConstant::Float(_)) | + Constant(NonIntegerConstant::Text(_)) | + Constant(NonIntegerConstant::Uuid(_)) | + Constant(NonIntegerConstant::Instant(_)) | + Constant(NonIntegerConstant::BigInteger(_)) | + SrcVar(_) | + Vector(_) => { + self.mark_known_empty(EmptyBecause::NonEntityArgument); + bail!(ErrorKind::InvalidArgumentType(function.clone(), ValueType::Ref.into(), position)); + }, + + } + } + /// Take a function argument and turn it into a `QueryValue` suitable for use in a concrete /// constraint. #[allow(dead_code)] diff --git a/query-algebrizer/src/errors.rs b/query-algebrizer/src/errors.rs index fb67c0de..306a1dbf 100644 --- a/query-algebrizer/src/errors.rs +++ b/query-algebrizer/src/errors.rs @@ -12,6 +12,7 @@ extern crate mentat_query; use mentat_core::{ ValueType, + ValueTypeSet, }; use self::mentat_query::{ @@ -49,6 +50,11 @@ error_chain! { display("value of type {} provided for var {}, expected {}", provided, var, declared) } + UnrecognizedIdent(ident: String) { + description("no entid found for ident") + display("no entid found for ident: {}", ident) + } + UnknownFunction(name: PlainSymbol) { description("no such function") display("no function named {}", name) @@ -80,9 +86,14 @@ error_chain! { display("invalid expression in ground constant") } - InvalidArgument(function: PlainSymbol, expected_type: &'static str, position: usize) { + InvalidArgument(function: PlainSymbol, expected: &'static str, position: usize) { description("invalid argument") - display("invalid argument to {}: expected {} in position {}.", function, expected_type, position) + display("invalid argument to {}: expected {} in position {}.", function, expected, position) + } + + InvalidArgumentType(function: PlainSymbol, expected_types: ValueTypeSet, position: usize) { + description("invalid argument") + display("invalid argument to {}: expected one of {:?} in position {}.", function, expected_types, position) } InvalidLimit(val: String, kind: ValueType) { diff --git a/query-algebrizer/src/lib.rs b/query-algebrizer/src/lib.rs index 32781d92..e4e2b6e0 100644 --- a/query-algebrizer/src/lib.rs +++ b/query-algebrizer/src/lib.rs @@ -8,6 +8,8 @@ // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. +#![recursion_limit="128"] + #[macro_use] extern crate error_chain; @@ -130,7 +132,19 @@ pub struct AlgebraicQuery { default_source: SrcVar, pub find_spec: Rc, has_aggregates: bool, + + /// The set of variables that the caller wishes to be used for grouping when aggregating. + /// These are specified in the query input, as `:with`, and are then chewed up during projection. + /// If no variables are supplied, then no additional grouping is necessary beyond the + /// non-aggregated projection list. pub with: BTreeSet, + + /// Some query features, such as ordering, are implemented by implicit reference to SQL columns. + /// In order for these references to be 'live', those columns must be projected. + /// This is the set of variables that must be so projected. + /// This is not necessarily every variable that will be so required -- some variables + /// will already be in the projection list. + pub named_projection: BTreeSet, pub order: Option>, pub limit: Limit, pub cc: clauses::ConjoiningClauses, @@ -147,7 +161,12 @@ impl AlgebraicQuery { self.find_spec .columns() .all(|e| match e { - &Element::Variable(ref var) => self.cc.is_value_bound(var), + &Element::Variable(ref var) | + &Element::Corresponding(ref var) => self.cc.is_value_bound(var), + + // For now, we pretend that aggregate functions are never fully bound: + // we don't statically compute them, even if we know the value of the var. + &Element::Aggregate(ref _fn) => false, }) } @@ -270,7 +289,6 @@ pub fn algebrize_with_inputs(known: Known, cc.process_required_types()?; let (order, extra_vars) = validate_and_simplify_order(&cc, parsed.order)?; - let with: BTreeSet = parsed.with.into_iter().chain(extra_vars.into_iter()).collect(); // This might leave us with an unused `:in` variable. let limit = if parsed.find_spec.is_unit_limited() { Limit::Fixed(1) } else { parsed.limit }; @@ -278,7 +296,8 @@ pub fn algebrize_with_inputs(known: Known, default_source: parsed.default_source, find_spec: Rc::new(parsed.find_spec), has_aggregates: false, // TODO: we don't parse them yet. - with: with, + with: parsed.with, + named_projection: extra_vars, order: order, limit: limit, cc: cc, diff --git a/query-algebrizer/src/types.rs b/query-algebrizer/src/types.rs index 1755d568..881a95e8 100644 --- a/query-algebrizer/src/types.rs +++ b/query-algebrizer/src/types.rs @@ -283,6 +283,10 @@ pub enum Inequality { GreaterThan, GreaterThanOrEquals, NotEquals, + + // Ref operators. + Unpermute, + Differ, } impl Inequality { @@ -294,6 +298,9 @@ impl Inequality { GreaterThan => ">", GreaterThanOrEquals => ">=", NotEquals => "<>", + + Unpermute => "<", + Differ => "<>", } } @@ -304,15 +311,31 @@ impl Inequality { ">" => Some(Inequality::GreaterThan), ">=" => Some(Inequality::GreaterThanOrEquals), "!=" => Some(Inequality::NotEquals), - _ => None, + + "unpermute" => Some(Inequality::Unpermute), + "differ" => Some(Inequality::Differ), + _ => None, } } // The built-in inequality operators apply to Long, Double, and Instant. pub fn supported_types(&self) -> ValueTypeSet { - let mut ts = ValueTypeSet::of_numeric_types(); - ts.insert(ValueType::Instant); - ts + use self::Inequality::*; + match self { + &LessThan | + &LessThanOrEquals | + &GreaterThan | + &GreaterThanOrEquals | + &NotEquals => { + let mut ts = ValueTypeSet::of_numeric_types(); + ts.insert(ValueType::Instant); + ts + }, + &Unpermute | + &Differ => { + ValueTypeSet::of_one(ValueType::Ref) + }, + } } } @@ -325,6 +348,9 @@ impl Debug for Inequality { &GreaterThan => ">", &GreaterThanOrEquals => ">=", &NotEquals => "!=", // Datalog uses !=. SQL uses <>. + + &Unpermute => "<", + &Differ => "<>", }) } } @@ -505,6 +531,7 @@ pub enum EmptyBecause { NonAttributeArgument, NonInstantArgument, NonNumericArgument, + NonEntityArgument, NonStringFulltextValue, NonFulltextAttribute(Entid), UnresolvedIdent(NamespacedKeyword), @@ -546,6 +573,9 @@ impl Debug for EmptyBecause { &NonInstantArgument => { write!(f, "Non-instant argument in instant place") }, + &NonEntityArgument => { + write!(f, "Non-entity argument in entity place") + }, &NonNumericArgument => { write!(f, "Non-numeric argument in numeric place") }, diff --git a/query-algebrizer/tests/predicate.rs b/query-algebrizer/tests/predicate.rs index d03c8ecf..53b0dd6d 100644 --- a/query-algebrizer/tests/predicate.rs +++ b/query-algebrizer/tests/predicate.rs @@ -69,9 +69,9 @@ fn test_instant_predicates_require_instants() { [?e :foo/date ?t] [(> ?t "2017-06-16T00:56:41.257Z")]]"#; match bails(known, query).0 { - ErrorKind::InvalidArgument(op, why, idx) => { + ErrorKind::InvalidArgumentType(op, why, idx) => { assert_eq!(op, PlainSymbol::new(">")); - assert_eq!(why, "numeric or instant"); + assert_eq!(why, ValueTypeSet::of_numeric_and_instant_types()); assert_eq!(idx, 1); }, _ => panic!("Expected InvalidArgument."), @@ -82,9 +82,9 @@ fn test_instant_predicates_require_instants() { [?e :foo/date ?t] [(> "2017-06-16T00:56:41.257Z", ?t)]]"#; match bails(known, query).0 { - ErrorKind::InvalidArgument(op, why, idx) => { + ErrorKind::InvalidArgumentType(op, why, idx) => { assert_eq!(op, PlainSymbol::new(">")); - assert_eq!(why, "numeric or instant"); + assert_eq!(why, ValueTypeSet::of_numeric_and_instant_types()); assert_eq!(idx, 0); // We get this right. }, _ => panic!("Expected InvalidArgument."), diff --git a/query-parser/src/parse.rs b/query-parser/src/parse.rs index 0a4ad37d..0d8e17f0 100644 --- a/query-parser/src/parse.rs +++ b/query-parser/src/parse.rs @@ -41,6 +41,7 @@ use self::mentat_parser_utils::value_and_span::{ }; use self::mentat_query::{ + Aggregate, Binding, Direction, Element, @@ -170,6 +171,8 @@ def_parser!(Query, order, Order, { .or(Query::variable().map(|v| Order(Direction::Ascending, v))) }); +def_matches_plain_symbol!(Query, the, "the"); + pub struct Where<'a>(std::marker::PhantomData<&'a ()>); def_parser!(Where, pattern_value_place, PatternValuePlace, { @@ -274,6 +277,13 @@ def_parser!(Query, func, (QueryFunction, Vec), { (Query::query_function(), Query::arguments()) }); +def_parser!(Query, aggregate, Aggregate, { + seq().of_exactly(Query::func()) + .map(|(func, args)| Aggregate { + func, args, + }) +}); + /// A vector containing just a parenthesized filter expression. def_parser!(Where, pred, WhereClause, { // Accept either a nested list or a nested vector here: @@ -417,10 +427,25 @@ def_matches_plain_symbol!(Find, ellipsis, "..."); def_matches_plain_symbol!(Find, placeholder, "_"); -def_parser!(Find, elem, Element, { +def_parser!(Find, variable_element, Element, { Query::variable().map(Element::Variable) }); +def_parser!(Find, corresponding_element, Element, { + seq().of_exactly(Query::the().with(Query::variable())) + .map(Element::Corresponding) +}); + +def_parser!(Find, aggregate_element, Element, { + Query::aggregate().map(Element::Aggregate) +}); + +def_parser!(Find, elem, Element, { + choice([try(Find::variable_element()), + try(Find::corresponding_element()), + try(Find::aggregate_element())]) +}); + def_parser!(Find, find_scalar, FindSpec, { Find::elem().skip(Find::period()) .map(FindSpec::FindScalar) @@ -955,6 +980,45 @@ mod test { ])); } + #[test] + fn test_the() { + assert_edn_parses_to!(Find::corresponding_element, + "(the ?y)", + Element::Corresponding(Variable::from_valid_name("?y"))); + assert_edn_parses_to!(Find::find_tuple, + "[(the ?x) ?y]", + FindSpec::FindTuple(vec![Element::Corresponding(Variable::from_valid_name("?x")), + Element::Variable(Variable::from_valid_name("?y"))])); + assert_edn_parses_to!(Find::spec, + "[(the ?x) ?y]", + FindSpec::FindTuple(vec![Element::Corresponding(Variable::from_valid_name("?x")), + Element::Variable(Variable::from_valid_name("?y"))])); + let expected_query = + FindQuery { + find_spec: FindSpec::FindTuple(vec![Element::Corresponding(Variable::from_valid_name("?x")), + Element::Variable(Variable::from_valid_name("?y"))]), + where_clauses: vec![ + WhereClause::Pattern(Pattern { + source: None, + entity: PatternNonValuePlace::Variable(Variable::from_valid_name("?x")), + attribute: PatternNonValuePlace::Placeholder, + value: PatternValuePlace::Variable(Variable::from_valid_name("?y")), + tx: PatternNonValuePlace::Placeholder, + })], + + default_source: SrcVar::DefaultSrc, + with: Default::default(), + in_vars: Default::default(), + in_sources: Default::default(), + limit: Limit::None, + order: None, + }; + assert_edn_parses_to!(Find::query, + "[:find [(the ?x) ?y] + :where [?x _ ?y]]", + expected_query); + } + #[test] fn test_where_fn() { assert_edn_parses_to!(Where::where_fn, diff --git a/query-projector/Cargo.toml b/query-projector/Cargo.toml index 92aa6e60..799c4c51 100644 --- a/query-projector/Cargo.toml +++ b/query-projector/Cargo.toml @@ -5,6 +5,7 @@ workspace = ".." [dependencies] error-chain = { git = "https://github.com/rnewman/error-chain", branch = "rnewman/sync" } +indexmap = "0.4" [dependencies.rusqlite] version = "0.13" diff --git a/query-projector/src/lib.rs b/query-projector/src/lib.rs index f44591ad..9eb32622 100644 --- a/query-projector/src/lib.rs +++ b/query-projector/src/lib.rs @@ -10,6 +10,7 @@ #[macro_use] extern crate error_chain; +extern crate indexmap; extern crate rusqlite; extern crate mentat_core; @@ -24,8 +25,13 @@ use std::collections::{ }; use std::iter; + use std::rc::Rc; +use indexmap::{ + IndexSet, +}; + use rusqlite::{ Row, Rows, @@ -33,8 +39,10 @@ use rusqlite::{ use mentat_core::{ SQLValueType, + SQLValueTypeSet, TypedValue, ValueType, + ValueTypeSet, ValueTypeTag, }; @@ -47,9 +55,12 @@ use mentat_db::{ }; use mentat_query::{ + Aggregate, Element, FindSpec, Limit, + PlainSymbol, + QueryFunction, Variable, }; @@ -57,12 +68,15 @@ use mentat_query_algebrizer::{ AlgebraicQuery, ColumnName, ConjoiningClauses, + QualifiedAlias, VariableBindings, VariableColumn, }; use mentat_query_sql::{ ColumnOrExpression, + Expression, + GroupBy, Name, Projection, ProjectedColumn, @@ -73,6 +87,39 @@ error_chain! { Error, ErrorKind, ResultExt, Result; } + errors { + /// We're just not done yet. Message that the feature is recognized but not yet + /// implemented. + NotYetImplemented(t: String) { + description("not yet implemented") + display("not yet implemented: {}", t) + } + CannotProjectImpossibleBinding(op: SimpleAggregationOp) { + description("no possible types for variable in projection list") + display("no possible types for value provided to {:?}", op) + } + CannotApplyAggregateOperationToTypes(op: SimpleAggregationOp, types: ValueTypeSet) { + description("cannot apply projection operation to types") + display("cannot apply projection operation {:?} to types {:?}", op, types) + } + UnboundVariable(var: PlainSymbol) { + description("cannot project unbound variable") + display("cannot project unbound variable {:?}", var) + } + NoTypeAvailableForVariable(var: PlainSymbol) { + description("cannot find type for variable") + display("cannot find type for variable {:?}", var) + } + UnexpectedResultsType(actual: &'static str, expected: &'static str) { + description("unexpected query results type") + display("expected {}, got {}", expected, actual) + } + AmbiguousAggregates(min_max_count: usize, corresponding_count: usize) { + description("ambiguous aggregates") + display("min/max expressions: {} (max 1), corresponding: {}", min_max_count, corresponding_count) + } + } + foreign_links { Rusqlite(rusqlite::Error); } @@ -80,13 +127,6 @@ error_chain! { links { DbError(mentat_db::Error, mentat_db::ErrorKind); } - - errors { - UnexpectedResultsType(actual: &'static str, expected: &'static str) { - description("unexpected query results type") - display("expected {}, got {}", expected, actual) - } - } } #[derive(Debug, PartialEq, Eq)] @@ -146,23 +186,54 @@ impl QueryOutput { pub fn from_constants(spec: &Rc, bindings: VariableBindings) -> QueryResults { use self::FindSpec::*; match &**spec { - &FindScalar(Element::Variable(ref var)) => { + &FindScalar(Element::Variable(ref var)) | + &FindScalar(Element::Corresponding(ref var)) => { let val = bindings.get(var).cloned(); QueryResults::Scalar(val) }, + &FindScalar(Element::Aggregate(ref _agg)) => { + // TODO + unimplemented!(); + }, &FindTuple(ref elements) => { - let values = elements.iter().map(|e| match e { - &Element::Variable(ref var) => bindings.get(var).cloned().expect("every var to have a binding"), - }).collect(); + let values = elements.iter() + .map(|e| match e { + &Element::Variable(ref var) | + &Element::Corresponding(ref var) => { + bindings.get(var).cloned().expect("every var to have a binding") + }, + &Element::Aggregate(ref _agg) => { + // TODO: static computation of aggregates, then + // implement the condition in `is_fully_bound`. + unreachable!(); + }, + }) + .collect(); QueryResults::Tuple(Some(values)) }, - &FindColl(Element::Variable(ref var)) => { + &FindColl(Element::Variable(ref var)) | + &FindColl(Element::Corresponding(ref var)) => { let val = bindings.get(var).cloned().expect("every var to have a binding"); QueryResults::Coll(vec![val]) }, + &FindColl(Element::Aggregate(ref _agg)) => { + // Does it even make sense to write + // [:find [(max ?x) ...] :where [_ :foo/bar ?x]] + // ? + // TODO + unimplemented!(); + }, &FindRel(ref elements) => { let values = elements.iter().map(|e| match e { - &Element::Variable(ref var) => bindings.get(var).cloned().expect("every var to have a binding"), + &Element::Variable(ref var) | + &Element::Corresponding(ref var) => { + bindings.get(var).cloned().expect("every var to have a binding") + }, + &Element::Aggregate(ref _agg) => { + // TODO: static computation of aggregates, then + // implement the condition in `is_fully_bound`. + unreachable!(); + }, }).collect(); QueryResults::Rel(vec![values]) }, @@ -254,16 +325,16 @@ impl TypedIndex { /// Look up this index and type(index) pair in the provided row. /// This function will panic if: /// - /// - This is an `Unknown` and the retrieved type code isn't an i32. + /// - This is an `Unknown` and the retrieved type tag isn't an i32. /// - If the retrieved value can't be coerced to a rusqlite `Value`. /// - Either index is out of bounds. /// - /// Because we construct our SQL projection list, the code that stored the data, and this + /// Because we construct our SQL projection list, the tag that stored the data, and this /// consumer, a panic here implies that we have a bad bug — we put data of a very wrong type in /// a row, and thus can't coerce to Value, we're retrieving from the wrong place, or our /// generated SQL is junk. /// - /// This function will return a runtime error if the type code is unknown, or the value is + /// This function will return a runtime error if the type tag is unknown, or the value is /// otherwise not convertible by the DB layer. fn lookup<'a, 'stmt>(&self, row: &Row<'a, 'stmt>) -> Result { use TypedIndex::*; @@ -282,17 +353,22 @@ impl TypedIndex { } } -fn candidate_column(cc: &ConjoiningClauses, var: &Variable) -> (ColumnOrExpression, Name) { +fn cc_column(cc: &ConjoiningClauses, var: &Variable) -> Result { + cc.column_bindings + .get(var) + .and_then(|cols| cols.get(0).cloned()) + .ok_or_else(|| ErrorKind::UnboundVariable(var.name()).into()) +} + +fn candidate_column(cc: &ConjoiningClauses, var: &Variable) -> Result<(ColumnOrExpression, Name)> { // Every variable should be bound by the top-level CC to at least // one column in the query. If that constraint is violated it's a // bug in our code, so it's appropriate to panic here. - let columns = cc.column_bindings - .get(var) - .expect(format!("Every variable should have a binding, but {:?} does not", var).as_str()); - - let qa = columns[0].clone(); - let name = VariableColumn::Variable(var.clone()).column_name(); - (ColumnOrExpression::Column(qa), name) + cc_column(cc, var) + .map(|qa| { + let name = VariableColumn::Variable(var.clone()).column_name(); + (ColumnOrExpression::Column(qa), name) + }) } fn candidate_type_column(cc: &ConjoiningClauses, var: &Variable) -> (ColumnOrExpression, Name) { @@ -304,24 +380,216 @@ fn candidate_type_column(cc: &ConjoiningClauses, var: &Variable) -> (ColumnOrExp } /// Return the projected column -- that is, a value or SQL column and an associated name -- for a -/// given variable. Also return the type, if known. +/// given variable. Also return the type. /// Callers are expected to determine whether to project a type tag as an additional SQL column. -pub fn projected_column_for_var(var: &Variable, cc: &ConjoiningClauses) -> (ProjectedColumn, Option) { +pub fn projected_column_for_var(var: &Variable, cc: &ConjoiningClauses) -> Result<(ProjectedColumn, ValueTypeSet)> { if let Some(value) = cc.bound_value(&var) { // If we already know the value, then our lives are easy. let tag = value.value_type(); let name = VariableColumn::Variable(var.clone()).column_name(); - (ProjectedColumn(ColumnOrExpression::Value(value.clone()), name), Some(tag)) + Ok((ProjectedColumn(ColumnOrExpression::Value(value.clone()), name), ValueTypeSet::of_one(tag))) } else { // If we don't, then the CC *must* have bound the variable. - let (column, name) = candidate_column(cc, var); - (ProjectedColumn(column, name), cc.known_type(var)) + let (column, name) = candidate_column(cc, var)?; + Ok((ProjectedColumn(column, name), cc.known_type_set(var))) } } +/// Returns two values: +/// - The `ColumnOrExpression` to use in the query. This will always refer to other +/// variables by name; never to a datoms column. +/// - The known type of that value. +fn projected_column_for_simple_aggregate(simple: &SimpleAggregate, cc: &ConjoiningClauses) -> Result<(ProjectedColumn, ValueType)> { + let known_types = cc.known_type_set(&simple.var); + let return_type = simple.op.is_applicable_to_types(known_types)?; + let projected_column_or_expression = + if let Some(value) = cc.bound_value(&simple.var) { + // Oh, we already know the value! + if simple.use_static_value() { + // We can statically compute the aggregate result for some operators -- not count or + // sum, but avg/max/min are OK. + ColumnOrExpression::Value(value) + } else { + let expression = Expression::Unary { + sql_op: simple.op.to_sql(), + arg: ColumnOrExpression::Value(value), + }; + ColumnOrExpression::Expression(Box::new(expression), return_type) + } + } else { + // The common case: the values are bound during execution. + let name = VariableColumn::Variable(simple.var.clone()).column_name(); + let expression = Expression::Unary { + sql_op: simple.op.to_sql(), + arg: ColumnOrExpression::ExistingColumn(name), + }; + ColumnOrExpression::Expression(Box::new(expression), return_type) + }; + Ok((ProjectedColumn(projected_column_or_expression, simple.column_name()), return_type)) +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum SimpleAggregationOp { + Avg, + Count, + Max, + Min, + Sum, +} + +impl SimpleAggregationOp { + fn to_sql(&self) -> &'static str { + use SimpleAggregationOp::*; + match self { + &Avg => "avg", + &Count => "count", + &Max => "max", + &Min => "min", + &Sum => "sum", + } + } + + fn for_function(function: &QueryFunction) -> Option { + match function.0.plain_name() { + "avg" => Some(SimpleAggregationOp::Avg), + "count" => Some(SimpleAggregationOp::Count), + "max" => Some(SimpleAggregationOp::Max), + "min" => Some(SimpleAggregationOp::Min), + "sum" => Some(SimpleAggregationOp::Sum), + _ => None, + } + } + + /// With knowledge of the types to which a variable might be bound, + /// return a `Result` to determine whether this aggregation is suitable. + /// For example, it's valid to take the `Avg` of `{Double, Long}`, invalid + /// to take `Sum` of `{Instant}`, valid to take (lexicographic) `Max` of `{String}`, + /// but invalid to take `Max` of `{Uuid, String}`. + /// + /// The returned type is the type of the result of the aggregation. + fn is_applicable_to_types(&self, possibilities: ValueTypeSet) -> Result { + use SimpleAggregationOp::*; + if possibilities.is_empty() { + bail!(ErrorKind::CannotProjectImpossibleBinding(*self)) + } + + match self { + // One can always count results. + &Count => Ok(ValueType::Long), + + // Only numeric types can be averaged or summed. + &Avg => { + if possibilities.is_only_numeric() { + // The mean of a set of numeric values will always, for our purposes, be a double. + Ok(ValueType::Double) + } else { + bail!(ErrorKind::CannotApplyAggregateOperationToTypes(*self, possibilities)) + } + }, + &Sum => { + if possibilities.is_only_numeric() { + if possibilities.contains(ValueType::Double) { + Ok(ValueType::Double) + } else { + // TODO: BigInt. + Ok(ValueType::Long) + } + } else { + bail!(ErrorKind::CannotApplyAggregateOperationToTypes(*self, possibilities)) + } + }, + + &Max | &Min => { + if possibilities.is_unit() { + use ValueType::*; + let the_type = possibilities.exemplar().expect("a type"); + match the_type { + // These types are numerically ordered. + Double | Long | Instant => Ok(the_type), + + // Boolean: false < true. + Boolean => Ok(the_type), + + // String: lexicographic order. + String => Ok(the_type), + + // These types are unordered. + Keyword | Ref | Uuid => { + bail!(ErrorKind::CannotApplyAggregateOperationToTypes(*self, possibilities)) + }, + } + } else { + // It cannot be empty -- we checked. + // The only types that are valid to compare cross-type are numbers. + if possibilities.is_only_numeric() { + // Note that if the max/min is a Long, it will be returned as a Double! + if possibilities.contains(ValueType::Double) { + Ok(ValueType::Double) + } else { + // TODO: BigInt. + Ok(ValueType::Long) + } + } else { + bail!(ErrorKind::CannotApplyAggregateOperationToTypes(*self, possibilities)) + } + } + }, + } + } +} + +struct SimpleAggregate { + op: SimpleAggregationOp, + var: Variable, +} + +impl SimpleAggregate { + fn column_name(&self) -> Name { + format!("({} {})", self.op.to_sql(), self.var.name()) + } + + fn use_static_value(&self) -> bool { + use SimpleAggregationOp::*; + match self.op { + Avg | Max | Min => true, + Count | Sum => false, + } + } +} + +trait SimpleAggregation { + fn to_simple(&self) -> Option; +} + +impl SimpleAggregation for Aggregate { + fn to_simple(&self) -> Option { + if self.args.len() != 1 { + return None; + } + self.args[0] + .as_variable() + .and_then(|v| SimpleAggregationOp::for_function(&self.func) + .map(|op| SimpleAggregate { op, var: v.clone(), })) + } +} + +/// An internal temporary struct to pass between the projection 'walk' and the +/// resultant projector. +/// Projection accumulates four things: +/// - Two SQL projection lists. We need two because aggregate queries are nested +/// in order to apply DISTINCT to values prior to aggregation. +/// - A collection of templates for the projector to use to extract values. +/// - A list of columns to use for grouping. Grouping is a property of the projection! +struct ProjectedElements { + sql_projection: Projection, + pre_aggregate_projection: Option, + templates: Vec, + group_by: Vec, +} + /// Walk an iterator of `Element`s, collecting projector templates and columns. /// -/// Returns a pair: the SQL projection (which should always be a `Projection::Columns`) +/// Returns a `ProjectedElements`, which combines SQL projections /// and a `Vec` of `TypedIndex` 'keys' to use when looking up values. /// /// Callers must ensure that every `Element` is distinct -- a query like @@ -334,26 +602,56 @@ pub fn projected_column_for_var(var: &Variable, cc: &ConjoiningClauses) -> (Proj fn project_elements<'a, I: IntoIterator>( count: usize, elements: I, - query: &AlgebraicQuery) -> Result<(Projection, Vec)> { + query: &AlgebraicQuery) -> Result { + + // Give a little padding for type tags. + let mut inner_projection = Vec::with_capacity(count + 2); + + // Everything in the outer query will _either_ be an aggregate operation + // _or_ a reference to a name projected from the inner. + // We'll expand them later. + let mut outer_projection: Vec> = Vec::with_capacity(count + 2); - let mut cols = Vec::with_capacity(count); let mut i: i32 = 0; + let mut min_max_count: usize = 0; + let mut corresponding_count: usize = 0; let mut templates = vec![]; - let mut with = query.with.clone(); + + let mut aggregates = false; + + // Any variable that appears intact in the :find clause, not inside an aggregate expression. + // "Query variables not in aggregate expressions will group the results and appear intact + // in the result." + // We use an ordered set here so that we group in the correct order. + let mut outer_variables = IndexSet::new(); + + // Any variable that we are projecting from the inner query. + let mut inner_variables = BTreeSet::new(); for e in elements { + if let &Element::Corresponding(_) = e { + corresponding_count += 1; + } + match e { // Each time we come across a variable, we push a SQL column // into the SQL projection, aliased to the name of the variable, // and we push an annotated index into the projector. - &Element::Variable(ref var) => { - // If we're projecting this, we don't need it in :with. - with.remove(var); + &Element::Variable(ref var) | + &Element::Corresponding(ref var) => { + if outer_variables.contains(var) { + eprintln!("Warning: duplicate variable {} in query.", var); + } - let (projected_column, maybe_type) = projected_column_for_var(&var, &query.cc); - cols.push(projected_column); - if let Some(ty) = maybe_type { - let tag = ty.value_type_tag(); + // TODO: it's an error to have `[:find ?x (the ?x) …]`. + outer_variables.insert(var.clone()); + inner_variables.insert(var.clone()); + + let (projected_column, type_set) = projected_column_for_var(&var, &query.cc)?; + outer_projection.push(Either::Left(projected_column.1.clone())); + inner_projection.push(projected_column); + + if let Some(tag) = type_set.unique_type_tag() { templates.push(TypedIndex::Known(i, tag)); i += 1; // We used one SQL column. } else { @@ -362,25 +660,213 @@ fn project_elements<'a, I: IntoIterator>( // Also project the type from the SQL query. let (type_column, type_name) = candidate_type_column(&query.cc, &var); - cols.push(ProjectedColumn(type_column, type_name)); + inner_projection.push(ProjectedColumn(type_column, type_name.clone())); + outer_projection.push(Either::Left(type_name)); } + }, + &Element::Aggregate(ref a) => { + if let Some(simple) = a.to_simple() { + aggregates = true; + + use SimpleAggregationOp::*; + match simple.op { + Max | Min => { + min_max_count += 1; + }, + Avg | Count | Sum => (), + } + + // When we encounter a simple aggregate -- one in which the aggregation can be + // implemented in SQL, on a single variable -- we just push the SQL aggregation op. + // We must ensure the following: + // - There's a column for the var. + // - The type of the var is known to be restricted to a sensible input set + // (not necessarily a single type, but e.g., all vals must be Double or Long). + // - The type set must be appropriate for the operation. E.g., `Sum` is not a + // meaningful operation on instants. + + let (projected_column, return_type) = projected_column_for_simple_aggregate(&simple, &query.cc)?; + outer_projection.push(Either::Right(projected_column)); + + if !inner_variables.contains(&simple.var) { + inner_variables.insert(simple.var.clone()); + let (projected_column, _type_set) = projected_column_for_var(&simple.var, &query.cc)?; + inner_projection.push(projected_column); + if query.cc.known_type_set(&simple.var).unique_type_tag().is_none() { + // Also project the type from the SQL query. + let (type_column, type_name) = candidate_type_column(&query.cc, &simple.var); + inner_projection.push(ProjectedColumn(type_column, type_name.clone())); + } + } + + // We might regret using the type tag here instead of the `ValueType`. + templates.push(TypedIndex::Known(i, return_type.value_type_tag())); + i += 1; + } else { + // TODO: complex aggregates. + bail!(ErrorKind::NotYetImplemented("complex aggregates".into())); + } + }, + } + } + + match (min_max_count, corresponding_count) { + (0, 0) | (_, 0) => {}, + (0, _) => { + eprintln!("Warning: used `(the ?var)` without `min` or `max`."); + }, + (1, _) => { + // This is the success case! + }, + (n, c) => { + bail!(ErrorKind::AmbiguousAggregates(n, c)); + }, + } + + // Anything used in ORDER BY (which we're given in `named_projection`) + // needs to be in the SQL column list so we can refer to it by name. + // + // They don't affect projection. + // + // If a variable is of a non-fixed type, also project the type tag column, so we don't + // accidentally unify across types when considering uniqueness! + for var in query.named_projection.iter() { + if outer_variables.contains(var) { + continue; + } + + // If it's a fixed value, we need do nothing further. + if query.cc.is_value_bound(&var) { + continue; + } + + let already_inner = inner_variables.contains(&var); + let (column, name) = candidate_column(&query.cc, &var)?; + if !already_inner { + inner_projection.push(ProjectedColumn(column, name.clone())); + inner_variables.insert(var.clone()); + } + + outer_projection.push(Either::Left(name)); + outer_variables.insert(var.clone()); + + // We don't care if a column has a single _type_, we care if it has a single type _tag_, + // because that's what we'll use if we're projecting. E.g., Long and Double. + // Single type implies single type tag, and is cheaper, so we check that first. + let types = query.cc.known_type_set(&var); + if !types.has_unique_type_tag() { + let (type_column, type_name) = candidate_type_column(&query.cc, &var); + if !already_inner { + inner_projection.push(ProjectedColumn(type_column, type_name.clone())); + } + + outer_projection.push(Either::Left(type_name)); + } + } + + if !aggregates { + // We're done -- we never need to group unless we're aggregating. + return Ok(ProjectedElements { + sql_projection: Projection::Columns(inner_projection), + pre_aggregate_projection: None, + templates, + group_by: vec![], + }); + } + + // OK, on to aggregates. + // We need to produce two SQL projection lists: one for an inner query and one for the outer. + // + // The inner serves these purposes: + // - Projecting variables to avoid duplicates being elided. (:with) + // - Making bindings available to the outermost query for projection, ordering, and grouping. + // + // The outer is consumed by the projector. + // + // We will also be producing: + // - A GROUP BY list to group the output of the inner query by non-aggregate variables + // so that it can be correctly aggregated. + + // Turn this collection of vars into a collection of columns from the query. + // We don't allow grouping on anything but a variable bound in the query. + // We group by tag if necessary. + let mut group_by = Vec::with_capacity(outer_variables.len() + 2); + for var in outer_variables.into_iter() { + if query.cc.is_value_bound(&var) { + continue; + } + + // The GROUP BY goes outside, but it needs every variable and type tag to be + // projected from inside. Collect in both directions here. + let name = VariableColumn::Variable(var.clone()).column_name(); + group_by.push(GroupBy::ProjectedColumn(name)); + + let needs_type_projection = !query.cc.known_type_set(&var).has_unique_type_tag(); + + let already_inner = inner_variables.contains(&var); + if !already_inner { + let (column, name) = candidate_column(&query.cc, &var)?; + inner_projection.push(ProjectedColumn(column, name.clone())); + } + + if needs_type_projection { + let type_name = VariableColumn::VariableTypeTag(var.clone()).column_name(); + if !already_inner { + let type_col = query.cc + .extracted_types + .get(&var) + .cloned() + .ok_or_else(|| ErrorKind::NoTypeAvailableForVariable(var.name().clone()))?; + inner_projection.push(ProjectedColumn(ColumnOrExpression::Column(type_col), type_name.clone())); + } + group_by.push(GroupBy::ProjectedColumn(type_name)); + }; + } + + for var in query.with.iter() { + // We never need to project a constant. + if query.cc.is_value_bound(&var) { + continue; + } + + // We don't need to add inner projections for :with if they are already there. + if !inner_variables.contains(&var) { + let (projected_column, type_set) = projected_column_for_var(&var, &query.cc)?; + inner_projection.push(projected_column); + + if type_set.unique_type_tag().is_none() { + // Also project the type from the SQL query. + let (type_column, type_name) = candidate_type_column(&query.cc, &var); + inner_projection.push(ProjectedColumn(type_column, type_name.clone())); } } } - for var in with { - // We need to collect these into the SQL column list, but they don't affect projection. - // If a variable is of a non-fixed type, also project the type tag column, so we don't - // accidentally unify across types when considering uniqueness! - let (column, name) = candidate_column(&query.cc, &var); - cols.push(ProjectedColumn(column, name)); - if query.cc.known_type(&var).is_none() { - let (type_column, type_name) = candidate_type_column(&query.cc, &var); - cols.push(ProjectedColumn(type_column, type_name)); - } - } + // At this point we know we have a double-layer projection. Collect the outer. + // + // If we have an inner and outer layer, the inner layer will name its + // variables, and the outer will re-project them. + // If we only have one layer, then the outer will do the naming. + // (We could try to not use names in the inner query, but then what would we do for + // `ground` and known values?) + // Walk the projection, switching the outer columns to use the inner names. - Ok((Projection::Columns(cols), templates)) + let outer_projection = outer_projection.into_iter().map(|c| { + match c { + Either::Left(name) => { + ProjectedColumn(ColumnOrExpression::ExistingColumn(name.clone()), + name) + }, + Either::Right(pc) => pc, + } + }).collect(); + + Ok(ProjectedElements { + sql_projection: Projection::Columns(outer_projection), + pre_aggregate_projection: Some(Projection::Columns(inner_projection)), + templates, + group_by, + }) } pub trait Projector { @@ -436,12 +922,14 @@ impl ScalarProjector { } } - fn combine(spec: Rc, sql: Projection, mut templates: Vec) -> Result { - let template = templates.pop().expect("Expected a single template"); + fn combine(spec: Rc, mut elements: ProjectedElements) -> Result { + let template = elements.templates.pop().expect("Expected a single template"); Ok(CombinedProjection { - sql_projection: sql, + sql_projection: elements.sql_projection, + pre_aggregate_projection: elements.pre_aggregate_projection, datalog_projector: Box::new(ScalarProjector::with_template(spec, template)), distinct: false, + group_by_cols: elements.group_by, }) } } @@ -486,6 +974,8 @@ impl TupleProjector { // This is exactly the same as for rel. fn collect_bindings<'a, 'stmt>(&self, row: Row<'a, 'stmt>) -> Result> { // There will be at least as many SQL columns as Datalog columns. + // gte 'cos we might be querying extra columns for ordering. + // The templates will take care of ignoring columns. assert!(row.column_count() >= self.len as i32); self.templates .iter() @@ -493,12 +983,14 @@ impl TupleProjector { .collect::>>() } - fn combine(spec: Rc, column_count: usize, sql: Projection, templates: Vec) -> Result { - let p = TupleProjector::with_templates(spec, column_count, templates); + fn combine(spec: Rc, column_count: usize, elements: ProjectedElements) -> Result { + let p = TupleProjector::with_templates(spec, column_count, elements.templates); Ok(CombinedProjection { - sql_projection: sql, + sql_projection: elements.sql_projection, + pre_aggregate_projection: elements.pre_aggregate_projection, datalog_projector: Box::new(p), distinct: false, + group_by_cols: elements.group_by, }) } } @@ -546,6 +1038,8 @@ impl RelProjector { fn collect_bindings<'a, 'stmt>(&self, row: Row<'a, 'stmt>) -> Result> { // There will be at least as many SQL columns as Datalog columns. + // gte 'cos we might be querying extra columns for ordering. + // The templates will take care of ignoring columns. assert!(row.column_count() >= self.len as i32); self.templates .iter() @@ -553,12 +1047,21 @@ impl RelProjector { .collect::>>() } - fn combine(spec: Rc, column_count: usize, sql: Projection, templates: Vec) -> Result { - let p = RelProjector::with_templates(spec, column_count, templates); + fn combine(spec: Rc, column_count: usize, elements: ProjectedElements) -> Result { + let p = RelProjector::with_templates(spec, column_count, elements.templates); + + // If every column yields only one value, or if this is an aggregate query + // (because by definition every column in an aggregate query is either + // aggregated or is a variable _upon which we group_), then don't bother + // with DISTINCT. + let already_distinct = elements.pre_aggregate_projection.is_some() || + p.columns().all(|e| e.is_unit()); Ok(CombinedProjection { - sql_projection: sql, + sql_projection: elements.sql_projection, + pre_aggregate_projection: elements.pre_aggregate_projection, datalog_projector: Box::new(p), - distinct: true, + distinct: !already_distinct, + group_by_cols: elements.group_by, }) } } @@ -597,12 +1100,22 @@ impl CollProjector { } } - fn combine(spec: Rc, sql: Projection, mut templates: Vec) -> Result { - let template = templates.pop().expect("Expected a single template"); + fn combine(spec: Rc, mut elements: ProjectedElements) -> Result { + let template = elements.templates.pop().expect("Expected a single template"); + let p = CollProjector::with_template(spec, template); + + // If every column yields only one value, or if this is an aggregate query + // (because by definition every column in an aggregate query is either + // aggregated or is a variable _upon which we group_), then don't bother + // with DISTINCT. + let already_distinct = elements.pre_aggregate_projection.is_some() || + p.columns().all(|e| e.is_unit()); Ok(CombinedProjection { - sql_projection: sql, - datalog_projector: Box::new(CollProjector::with_template(spec, template)), - distinct: true, + sql_projection: elements.sql_projection, + pre_aggregate_projection: elements.pre_aggregate_projection, + datalog_projector: Box::new(p), + distinct: !already_distinct, + group_by_cols: elements.group_by, }) } } @@ -626,19 +1139,33 @@ impl Projector for CollProjector { } } -/// Combines the two things you need to turn a query into SQL and turn its results into -/// `QueryResults`. +/// Combines the things you need to turn a query into SQL and turn its results into +/// `QueryResults`: SQL-related projection information (`DISTINCT`, columns, etc.) and +/// a Datalog projector that turns SQL into structures. pub struct CombinedProjection { /// A SQL projection, mapping columns mentioned in the body of the query to columns in the /// output. pub sql_projection: Projection, + /// If a query contains aggregates, we need to generate a nested subquery: an inner query + /// that returns our distinct variable bindings (and any `:with` vars), and an outer query + /// that applies aggregation. That's so we can put `DISTINCT` in the inner query and apply + /// aggregation afterwards -- `SELECT DISTINCT count(foo)` counts _then_ uniques, and we need + /// the opposite to implement Datalog distinct semantics. + /// If this is the case, `sql_projection` will be the outer query's projection list, and + /// `pre_aggregate_projection` will be the inner. + /// If the query doesn't use aggregation, this field will be `None`. + pub pre_aggregate_projection: Option, + /// A Datalog projection. This consumes rows of the appropriate shape (as defined by /// the SQL projection) to yield one of the four kinds of Datalog query result. pub datalog_projector: Box, /// True if this query requires the SQL query to include DISTINCT. pub distinct: bool, + + // A list of column names to use as a GROUP BY clause. + pub group_by_cols: Vec, } impl CombinedProjection { @@ -665,7 +1192,17 @@ pub fn query_projection(query: &AlgebraicQuery) -> Result = spec.columns().map(|e| match e { &Element::Variable(ref var) => var.clone() }).collect(); + let variables: BTreeSet = spec.columns() + .map(|e| match e { + &Element::Variable(ref var) | + &Element::Corresponding(ref var) => var.clone(), + &Element::Aggregate(ref _agg) => { + // TODO: static computation of aggregates, then + // implement the condition in `is_fully_bound`. + unreachable!(); + }, + }) + .collect(); // TODO: error handling let results = QueryOutput::from_constants(&spec, query.cc.value_bindings(&variables)); @@ -679,25 +1216,25 @@ pub fn query_projection(query: &AlgebraicQuery) -> Result { - let (cols, templates) = project_elements(1, iter::once(element), query)?; - CollProjector::combine(spec, cols, templates).map(|p| p.flip_distinct_for_limit(&query.limit)) + let elements = project_elements(1, iter::once(element), query)?; + CollProjector::combine(spec, elements).map(|p| p.flip_distinct_for_limit(&query.limit)) }, FindScalar(ref element) => { - let (cols, templates) = project_elements(1, iter::once(element), query)?; - ScalarProjector::combine(spec, cols, templates) + let elements = project_elements(1, iter::once(element), query)?; + ScalarProjector::combine(spec, elements) }, FindRel(ref elements) => { let column_count = query.find_spec.expected_column_count(); - let (cols, templates) = project_elements(column_count, elements, query)?; - RelProjector::combine(spec, column_count, cols, templates).map(|p| p.flip_distinct_for_limit(&query.limit)) + let elements = project_elements(column_count, elements, query)?; + RelProjector::combine(spec, column_count, elements).map(|p| p.flip_distinct_for_limit(&query.limit)) }, FindTuple(ref elements) => { let column_count = query.find_spec.expected_column_count(); - let (cols, templates) = project_elements(column_count, elements, query)?; - TupleProjector::combine(spec, column_count, cols, templates) + let elements = project_elements(column_count, elements, query)?; + TupleProjector::combine(spec, column_count, elements) }, }.map(Either::Right) } diff --git a/query-projector/tests/aggregates.rs b/query-projector/tests/aggregates.rs new file mode 100644 index 00000000..027f8ebe --- /dev/null +++ b/query-projector/tests/aggregates.rs @@ -0,0 +1,88 @@ +// Copyright 2016 Mozilla +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use +// this file except in compliance with the License. You may obtain a copy of the +// License at http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +extern crate mentat_core; +extern crate mentat_query; +extern crate mentat_query_algebrizer; +extern crate mentat_query_parser; +extern crate mentat_query_projector; + +use mentat_core::{ + Attribute, + Entid, + Schema, + ValueType, +}; + +use mentat_query_parser::{ + parse_find_string, +}; + +use mentat_query::{ + NamespacedKeyword, +}; + +use mentat_query_algebrizer::{ + Known, + algebrize, +}; + +use mentat_query_projector::{ + query_projection, +}; + +// These are helpers that tests use to build Schema instances. +fn associate_ident(schema: &mut Schema, i: NamespacedKeyword, e: Entid) { + schema.entid_map.insert(e, i.clone()); + schema.ident_map.insert(i.clone(), e); +} + +fn add_attribute(schema: &mut Schema, e: Entid, a: Attribute) { + schema.attribute_map.insert(e, a); +} + +fn prepopulated_schema() -> Schema { + let mut schema = Schema::default(); + associate_ident(&mut schema, NamespacedKeyword::new("foo", "name"), 65); + associate_ident(&mut schema, NamespacedKeyword::new("foo", "age"), 68); + associate_ident(&mut schema, NamespacedKeyword::new("foo", "height"), 69); + add_attribute(&mut schema, 65, Attribute { + value_type: ValueType::String, + multival: false, + ..Default::default() + }); + add_attribute(&mut schema, 68, Attribute { + value_type: ValueType::Long, + multival: false, + ..Default::default() + }); + add_attribute(&mut schema, 69, Attribute { + value_type: ValueType::Long, + multival: false, + ..Default::default() + }); + schema +} + +#[test] +fn test_aggregate_unsuitable_type() { + let schema = prepopulated_schema(); + + let query = r#"[:find (avg ?e) + :where + [?e :foo/age ?a]]"#; + + // While the query itself algebrizes and parses… + let parsed = parse_find_string(query).expect("query input to have parsed"); + let algebrized = algebrize(Known::for_schema(&schema), parsed).expect("query algebrizes"); + + // … when we look at the projection list, we cannot reconcile the types. + assert!(query_projection(&algebrized).is_err()); +} diff --git a/query-sql/src/lib.rs b/query-sql/src/lib.rs index 3dfe413c..68a0790e 100644 --- a/query-sql/src/lib.rs +++ b/query-sql/src/lib.rs @@ -15,11 +15,11 @@ extern crate mentat_query_algebrizer; extern crate mentat_sql; use std::boxed::Box; - use mentat_core::{ Entid, - TypedValue, SQLTypeAffinity, + TypedValue, + ValueType, }; use mentat_query::{ @@ -57,10 +57,16 @@ use mentat_sql::{ /// implementation for each storage backend. Passing `TypedValue`s here allows for that. pub enum ColumnOrExpression { Column(QualifiedAlias), + ExistingColumn(Name), Entid(Entid), // Because it's so common. Integer(i32), // We use these for type codes etc. Long(i64), Value(TypedValue), + Expression(Box, ValueType), // Track the return type. +} + +pub enum Expression { + Unary { sql_op: &'static str, arg: ColumnOrExpression }, } /// `QueryValue` and `ColumnOrExpression` are almost identical… merge somehow? @@ -85,6 +91,26 @@ pub enum Projection { One, } +#[derive(Debug, PartialEq, Eq)] +pub enum GroupBy { + ProjectedColumn(Name), + QueryColumn(QualifiedAlias), + // TODO: non-projected expressions, etc. +} + +impl QueryFragment for GroupBy { + fn push_sql(&self, out: &mut QueryBuilder) -> BuildQueryResult { + match self { + &GroupBy::ProjectedColumn(ref name) => { + out.push_identifier(name.as_str()) + }, + &GroupBy::QueryColumn(ref qa) => { + qualified_alias_push_sql(out, qa) + }, + } + } +} + #[derive(Copy, Clone)] pub struct Op(pub &'static str); // TODO: we can do better than this! @@ -190,6 +216,7 @@ pub struct SelectQuery { pub projection: Projection, pub from: FromClause, pub constraints: Vec, + pub group_by: Vec, pub order: Vec, pub limit: Limit, } @@ -262,10 +289,11 @@ impl QueryFragment for ColumnOrExpression { fn push_sql(&self, out: &mut QueryBuilder) -> BuildQueryResult { use self::ColumnOrExpression::*; match self { - &Column(QualifiedAlias(ref table, ref column)) => { - out.push_identifier(table.as_str())?; - out.push_sql("."); - push_column(out, column) + &Column(ref qa) => { + qualified_alias_push_sql(out, qa) + }, + &ExistingColumn(ref alias) => { + out.push_identifier(alias.as_str()) }, &Entid(entid) => { out.push_sql(entid.to_string().as_str()); @@ -282,6 +310,23 @@ impl QueryFragment for ColumnOrExpression { &Value(ref v) => { out.push_typed_value(v) }, + &Expression(ref e, _) => { + e.push_sql(out) + }, + } + } +} + +impl QueryFragment for Expression { + fn push_sql(&self, out: &mut QueryBuilder) -> BuildQueryResult { + match self { + &Expression::Unary { ref sql_op, ref arg } => { + out.push_sql(sql_op); // No need to escape built-ins. + out.push_sql("("); + arg.push_sql(out)?; + out.push_sql(")"); + Ok(()) + }, } } } @@ -368,10 +413,8 @@ impl QueryFragment for Constraint { Ok(()) }, &NotExists { ref subquery } => { - out.push_sql("NOT EXISTS ("); - subquery.push_sql(out)?; - out.push_sql(")"); - Ok(()) + out.push_sql("NOT EXISTS "); + subquery.push_sql(out) }, &TypeCheck { ref value, ref affinity } => { out.push_sql("typeof("); @@ -397,6 +440,13 @@ impl QueryFragment for JoinOp { } } +// We don't own QualifiedAlias or QueryFragment, so we can't implement the trait. +fn qualified_alias_push_sql(out: &mut QueryBuilder, qa: &QualifiedAlias) -> BuildQueryResult { + out.push_identifier(qa.0.as_str())?; + out.push_sql("."); + push_column(out, &qa.1) +} + // We don't own SourceAlias or QueryFragment, so we can't implement the trait. fn source_alias_push_sql(out: &mut QueryBuilder, sa: &SourceAlias) -> BuildQueryResult { let &SourceAlias(ref table, ref alias) = sa; @@ -440,7 +490,10 @@ impl QueryFragment for TableOrSubquery { out.push_identifier(table_alias.as_str()) }, &Subquery(ref subquery) => { - subquery.push_sql(out) + out.push_sql("("); + subquery.push_sql(out)?; + out.push_sql(")"); + Ok(()) }, &Values(ref values, ref table_alias) => { // XXX: does this work for Values::Unnamed? @@ -545,6 +598,16 @@ impl QueryFragment for SelectQuery { { out.push_sql(" AND ") }); } + match &self.group_by { + group_by if !group_by.is_empty() => { + out.push_sql(" GROUP BY "); + interpose!(group, group_by, + { group.push_sql(out)? }, + { out.push_sql(", ") }); + }, + _ => {}, + } + if !self.order.is_empty() { out.push_sql(" ORDER BY "); interpose!(&OrderBy(ref dir, ref var), self.order, @@ -745,6 +808,7 @@ mod tests { right: ColumnOrExpression::Entid(65536), }, ], + group_by: vec![], order: vec![], limit: Limit::None, }; diff --git a/query-translator/src/translate.rs b/query-translator/src/translate.rs index a5594a80..c5bd4a0f 100644 --- a/query-translator/src/translate.rs +++ b/query-translator/src/translate.rs @@ -11,6 +11,7 @@ use mentat_core::{ SQLTypeAffinity, SQLValueType, + SQLValueTypeSet, TypedValue, ValueType, ValueTypeTag, @@ -56,6 +57,7 @@ use mentat_query_sql::{ ColumnOrExpression, Constraint, FromClause, + GroupBy, Op, ProjectedColumn, Projection, @@ -287,7 +289,8 @@ fn table_for_computed(computed: ComputedTable, alias: TableAlias) -> TableOrSubq // project it as the variable name. // E.g., SELECT datoms03.v AS `?x`. for var in projection.iter() { - let (projected_column, maybe_type) = projected_column_for_var(var, &cc); + // TODO: chain results out. + let (projected_column, type_set) = projected_column_for_var(var, &cc).expect("every var to be bound"); columns.push(projected_column); // Similarly, project type tags if they're not known conclusively in the @@ -295,10 +298,10 @@ fn table_for_computed(computed: ComputedTable, alias: TableAlias) -> TableOrSubq // Assumption: we'll never need to project a tag without projecting the value of a variable. if type_extraction.contains(var) { let expression = - if let Some(ty) = maybe_type { + if let Some(tag) = type_set.unique_type_tag() { // If we know the type for sure, just project the constant. // SELECT datoms03.v AS `?x`, 10 AS `?x_value_type_tag` - ColumnOrExpression::Integer(ty.value_type_tag()) + ColumnOrExpression::Integer(tag) } else { // Otherwise, we'll have an established type binding! This'll be // either a datoms table or, recursively, a subquery. Project @@ -319,7 +322,7 @@ fn table_for_computed(computed: ComputedTable, alias: TableAlias) -> TableOrSubq // Each arm simply turns into a subquery. // The SQL translation will stuff "UNION" between each arm. let projection = Projection::Columns(columns); - cc_to_select_query(projection, cc, false, None, Limit::None) + cc_to_select_query(projection, cc, false, vec![], None, Limit::None) }).collect(), alias) }, @@ -340,6 +343,7 @@ fn empty_query() -> SelectQuery { distinct: false, projection: Projection::One, from: FromClause::Nothing, + group_by: vec![], constraints: vec![], order: vec![], limit: Limit::None, @@ -352,6 +356,7 @@ fn empty_query() -> SelectQuery { fn cc_to_select_query(projection: Projection, cc: ConjoiningClauses, distinct: bool, + group_by: Vec, order: Option>, limit: Limit) -> SelectQuery { let from = if cc.from.is_empty() { @@ -387,6 +392,7 @@ fn cc_to_select_query(projection: Projection, distinct: distinct, projection: projection, from: from, + group_by: group_by, constraints: cc.wheres .into_iter() .map(|c| c.to_constraint()) @@ -403,7 +409,31 @@ pub fn cc_to_exists(cc: ConjoiningClauses) -> SelectQuery { // In this case we can produce a very simple query that returns no results. empty_query() } else { - cc_to_select_query(Projection::One, cc, false, None, Limit::None) + cc_to_select_query(Projection::One, cc, false, vec![], None, Limit::None) + } +} + +/// Take a query and wrap it as a subquery of a new query with the provided projection list. +/// All limits, ordering, and grouping move to the outer query. The inner query is marked as +/// distinct. +fn re_project(mut inner: SelectQuery, projection: Projection) -> SelectQuery { + let outer_distinct = inner.distinct; + inner.distinct = true; + let group_by = inner.group_by; + inner.group_by = vec![]; + let order_by = inner.order; + inner.order = vec![]; + let limit = inner.limit; + inner.limit = Limit::None; + + SelectQuery { + distinct: outer_distinct, + projection: projection, + from: FromClause::TableList(TableList(vec![TableOrSubquery::Subquery(Box::new(inner))])), + constraints: vec![], + group_by: group_by, + order: order_by, + limit: limit, } } @@ -414,10 +444,30 @@ pub fn query_to_select(query: AlgebraicQuery) -> Result { // SQL-based aggregation -- `SELECT SUM(datoms00.e)` -- is fine. query_projection(&query).map(|e| match e { Either::Left(constant) => ProjectedSelect::Constant(constant), - Either::Right(CombinedProjection { sql_projection, datalog_projector, distinct, }) => { - let q = cc_to_select_query(sql_projection, query.cc, distinct, query.order, query.limit); + Either::Right(CombinedProjection { + sql_projection, + pre_aggregate_projection, + datalog_projector, + distinct, + group_by_cols, + }) => { ProjectedSelect::Query { - query: q, + query: match pre_aggregate_projection { + // If we know we need a nested query for aggregation, build that first. + Some(pre_aggregate) => { + let inner = cc_to_select_query(pre_aggregate, + query.cc, + distinct, + group_by_cols, + query.order, + query.limit); + let outer = re_project(inner, sql_projection); + outer + }, + None => { + cc_to_select_query(sql_projection, query.cc, distinct, group_by_cols, query.order, query.limit) + }, + }, projector: datalog_projector, } }, diff --git a/query-translator/tests/translate.rs b/query-translator/tests/translate.rs index 8e11e9bb..bdcd91aa 100644 --- a/query-translator/tests/translate.rs +++ b/query-translator/tests/translate.rs @@ -662,13 +662,13 @@ fn test_with_without_aggregate() { // Known type. let query = r#"[:find ?x :with ?y :where [?x :foo/bar ?y]]"#; let SQLQuery { sql, args } = translate(&schema, query); - assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x`, `datoms00`.v AS `?y` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99"); + assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99"); assert_eq!(args, vec![]); // Unknown type. let query = r#"[:find ?x :with ?y :where [?x _ ?y]]"#; let SQLQuery { sql, args } = translate(&schema, query); - assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x`, `all_datoms00`.v AS `?y`, `all_datoms00`.value_type_tag AS `?y_value_type_tag` FROM `all_datoms` AS `all_datoms00`"); + assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x` FROM `all_datoms` AS `all_datoms00`"); assert_eq!(args, vec![]); } @@ -1081,3 +1081,37 @@ fn test_instant_range() { AND `datoms00`.v > 1497574601257000"); assert_eq!(args, vec![]); } + +#[test] +fn test_project_aggregates() { + let schema = prepopulated_typed_schema(ValueType::Long); + let query = r#"[:find ?e (max ?t) + :where + [?e :foo/bar ?t]]"#; + let SQLQuery { sql, args } = translate(&schema, query); + + // No outer DISTINCT: we aggregate or group by every variable. + assert_eq!(sql, "SELECT `?e` AS `?e`, max(`?t`) AS `(max ?t)` \ + FROM \ + (SELECT DISTINCT \ + `datoms00`.e AS `?e`, \ + `datoms00`.v AS `?t` \ + FROM `datoms` AS `datoms00` \ + WHERE `datoms00`.a = 99) \ + GROUP BY `?e`"); + assert_eq!(args, vec![]); + + let query = r#"[:find (max ?t) + :with ?e + :where + [?e :foo/bar ?t]]"#; + let SQLQuery { sql, args } = translate(&schema, query); + assert_eq!(sql, "SELECT max(`?t`) AS `(max ?t)` \ + FROM \ + (SELECT DISTINCT \ + `datoms00`.v AS `?t`, \ + `datoms00`.e AS `?e` \ + FROM `datoms` AS `datoms00` \ + WHERE `datoms00`.a = 99)"); + assert_eq!(args, vec![]); +} diff --git a/query/src/lib.rs b/query/src/lib.rs index d041ca63..249158dc 100644 --- a/query/src/lib.rs +++ b/query/src/lib.rs @@ -153,6 +153,12 @@ impl QueryFunction { } } +impl std::fmt::Display for QueryFunction { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{}", self.0) + } +} + #[derive(Clone, Debug, Eq, PartialEq)] pub enum Direction { Ascending, @@ -266,6 +272,26 @@ impl FromValue for FnArg { } } +// For display in column headings in the repl. +impl std::fmt::Display for FnArg { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + &FnArg::Variable(ref var) => write!(f, "{}", var), + &FnArg::SrcVar(ref var) => { + if var == &SrcVar::DefaultSrc { + write!(f, "$") + } else { + write!(f, "{:?}", var) + } + }, + &FnArg::EntidOrInteger(entid) => write!(f, "{}", entid), + &FnArg::IdentOrKeyword(ref kw) => write!(f, "{}", kw), + &FnArg::Constant(ref constant) => write!(f, "{:?}", constant), + &FnArg::Vector(ref vec) => write!(f, "{:?}", vec), + } + } +} + impl FnArg { pub fn as_variable(&self) -> Option<&Variable> { match self { @@ -435,20 +461,37 @@ pub struct Pull { } */ -/* +#[derive(Debug, Eq, PartialEq)] pub struct Aggregate { - pub fn_name: String, + pub func: QueryFunction, pub args: Vec, } -*/ #[derive(Debug, Eq, PartialEq)] pub enum Element { Variable(Variable), - // Aggregate(Aggregate), // TODO + Aggregate(Aggregate), + + /// In a query with a `max` or `min` aggregate, a corresponding variable + /// (indicated in the query with `(the ?var)`, is guaranteed to come from + /// the row that provided the max or min value. Queries with more than one + /// `max` or `min` cannot yield predictable behavior, and will err during + /// algebrizing. + Corresponding(Variable), // Pull(Pull), // TODO } +impl Element { + /// Returns true if the element must yield only one value. + pub fn is_unit(&self) -> bool { + match self { + &Element::Variable(_) => false, + &Element::Aggregate(_) => true, + &Element::Corresponding(_) => true, + } + } +} + impl From for Element { fn from(x: Variable) -> Element { Element::Variable(x) @@ -461,6 +504,16 @@ impl std::fmt::Display for Element { &Element::Variable(ref var) => { write!(f, "{}", var) }, + &Element::Aggregate(ref agg) => { + match agg.args.len() { + 0 => write!(f, "({})", agg.func), + 1 => write!(f, "({} {})", agg.func, agg.args[0]), + _ => write!(f, "({} {:?})", agg.func, agg.args), + } + }, + &Element::Corresponding(ref var) => { + write!(f, "(the {})", var) + }, } } } diff --git a/src/conn.rs b/src/conn.rs index 7c88d8f3..9146d16f 100644 --- a/src/conn.rs +++ b/src/conn.rs @@ -173,6 +173,13 @@ impl Store { sqlite: connection, }) } + + pub fn transact(&mut self, transaction: &str) -> Result { + let mut ip = self.begin_transaction()?; + let report = ip.transact(transaction)?; + ip.commit()?; + Ok(report) + } } pub trait Queryable { diff --git a/tests/query.rs b/tests/query.rs index 13b73a62..044c7658 100644 --- a/tests/query.rs +++ b/tests/query.rs @@ -15,7 +15,11 @@ extern crate time; extern crate mentat; extern crate mentat_core; extern crate mentat_db; + +// TODO: when we switch to `failure`, make this more humane. extern crate mentat_query_algebrizer; // For errors. +extern crate mentat_query_projector; // For errors. +extern crate mentat_query_translator; // For errors. use std::str::FromStr; @@ -32,10 +36,13 @@ use mentat_core::{ }; use mentat::{ + IntoResult, NamespacedKeyword, PlainSymbol, QueryInputs, + Queryable, QueryResults, + Store, Variable, new_connection, }; @@ -381,7 +388,7 @@ fn test_fulltext() { _ => panic!("Unexpected results."), } }, - _ => panic!("Expected query to work."), + r => panic!("Unexpected results {:?}.", r), } let a = conn.transact(&mut c, r#"[[:db/add "a" :foo/term "talk"]]"#) @@ -522,7 +529,6 @@ fn test_lookup() { let fetched_many = conn.lookup_value_for_attribute(&c, *entid, &foo_many).unwrap().unwrap(); assert!(two_longs.contains(&fetched_many)); } - #[test] fn test_type_reqs() { let mut c = new_connection("").expect("Couldn't open conn."); @@ -621,3 +627,477 @@ fn test_type_reqs() { } }; } + +#[test] +fn test_monster_head_aggregates() { + let mut store = Store::open("").expect("opened"); + let mut in_progress = store.begin_transaction().expect("began"); + + in_progress.transact(r#"[ + {:db/ident :monster/heads + :db/valueType :db.type/long + :db/cardinality :db.cardinality/one} + {:db/ident :monster/name + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one + :db/index true + :db/unique :db.unique/identity} + {:db/ident :monster/weapon + :db/valueType :db.type/string + :db/cardinality :db.cardinality/many} + ]"#).expect("transacted"); + + in_progress.transact(r#"[ + {:monster/heads 1 + :monster/name "Medusa" + :monster/weapon "Stony gaze"} + {:monster/heads 1 + :monster/name "Cyclops" + :monster/weapon ["Large club" "Mighty arms" "Stompy feet"]} + {:monster/heads 1 + :monster/name "Chimera" + :monster/weapon "Goat-like agility"} + {:monster/heads 3 + :monster/name "Cerberus" + :monster/weapon ["8-foot Kong®" "Deadly drool"]} + ]"#).expect("transacted"); + + // Without :with, uniqueness applies prior to aggregation, so we get 1 + 3 = 4. + let res = in_progress.q_once("[:find (sum ?heads) . :where [?monster :monster/heads ?heads]]", None) + .expect("results") + .into(); + match res { + QueryResults::Scalar(Some(TypedValue::Long(count))) => { + assert_eq!(count, 4); + }, + r => panic!("Unexpected result {:?}", r), + }; + + // With :with, uniqueness includes the monster, so we get 1 + 1 + 1 + 3 = 6. + let res = in_progress.q_once("[:find (sum ?heads) . :with ?monster :where [?monster :monster/heads ?heads]]", None) + .expect("results") + .into(); + match res { + QueryResults::Scalar(Some(TypedValue::Long(count))) => { + assert_eq!(count, 6); + }, + r => panic!("Unexpected result {:?}", r), + }; + + // Aggregates group. + let res = in_progress.q_once(r#"[:find ?name (count ?weapon) + :with ?monster + :order (asc ?name) + :where [?monster :monster/name ?name] + [?monster :monster/weapon ?weapon]]"#, + None) + .expect("results") + .into(); + match res { + QueryResults::Rel(vals) => { + let expected = vec![ + vec!["Cerberus".into(), TypedValue::Long(2)], + vec!["Chimera".into(), TypedValue::Long(1)], + vec!["Cyclops".into(), TypedValue::Long(3)], + vec!["Medusa".into(), TypedValue::Long(1)], + ]; + assert_eq!(vals, expected); + }, + r => panic!("Unexpected result {:?}", r), + }; + + in_progress.rollback().expect("rolled back"); +} + +#[test] +fn test_basic_aggregates() { + let mut store = Store::open("").expect("opened"); + + store.transact(r#"[ + {:db/ident :foo/is-vegetarian :db/valueType :db.type/boolean :db/cardinality :db.cardinality/one} + {:db/ident :foo/age :db/valueType :db.type/long :db/cardinality :db.cardinality/one} + {:db/ident :foo/name :db/valueType :db.type/string :db/cardinality :db.cardinality/one} + ]"#).unwrap(); + + let _ids = store.transact(r#"[ + [:db/add "a" :foo/name "Alice"] + [:db/add "b" :foo/name "Beli"] + [:db/add "c" :foo/name "Carlos"] + [:db/add "d" :foo/name "Diana"] + [:db/add "a" :foo/is-vegetarian true] + [:db/add "b" :foo/is-vegetarian true] + [:db/add "c" :foo/is-vegetarian false] + [:db/add "d" :foo/is-vegetarian false] + [:db/add "a" :foo/age 14] + [:db/add "b" :foo/age 22] + [:db/add "c" :foo/age 42] + [:db/add "d" :foo/age 28] + ]"#).unwrap().tempids; + + // Count the number of distinct bindings of `?veg` that are `true` -- namely, one. + // This is not the same as `count-distinct`: note the distinction between + // including `:with` and not. + // In this case, the `DISTINCT` must occur inside the aggregation, not outside it. + /* + Rather than: + + SELECT DISTINCT count(1) AS `(count ?veg)` + FROM `datoms` AS `datoms00` + WHERE `datoms00`.a = 65536 + AND `datoms00`.v = 1; + + our query should be + + SELECT DISTINCT count(`?veg`) AS `(count ?veg)` + FROM ( + SELECT DISTINCT 1 AS `?veg` + FROM `datoms` AS `datoms00` + WHERE `datoms00`.a = 65536 + AND `datoms00`.v = 1 + ); + */ + let r = store.q_once(r#"[:find (count ?veg) + :where + [_ :foo/is-vegetarian ?veg] + [(ground true) ?veg]]"#, None) + .expect("results") + .into(); + match r { + QueryResults::Rel(vals) => { + assert_eq!(vals, vec![vec![TypedValue::Long(1)]]); + }, + _ => panic!("Expected rel."), + } + + // And this should be + /* + SELECT DISTINCT count(`?veg`) AS `(count ?veg)` + FROM ( + SELECT DISTINCT 1 AS `?veg`, `datoms00`.e AS `?person` + FROM `datoms` AS `datoms00` + WHERE `datoms00`.a = 65536 + AND `datoms00`.v = 1 + ); + */ + let r = store.q_once(r#"[:find (count ?veg) . + :with ?person + :where + [?person :foo/is-vegetarian ?veg] + [(ground true) ?veg]]"#, None) + .expect("results") + .into(); + match r { + QueryResults::Scalar(Some(val)) => { + assert_eq!(val, TypedValue::Long(2)); + }, + _ => panic!("Expected scalar."), + } + + // What are the oldest and youngest ages? + let r = store.q_once(r#"[:find [(min ?age) (max ?age)] + :where + [_ :foo/age ?age]]"#, None) + .expect("results") + .into(); + match r { + QueryResults::Tuple(Some(vals)) => { + assert_eq!(vals, + vec![TypedValue::Long(14), + TypedValue::Long(42)]); + }, + _ => panic!("Expected tuple."), + } + + // Who's youngest, via order? + let r = store.q_once(r#"[:find [?name ?age] + :order (asc ?age) + :where + [?x :foo/age ?age] + [?x :foo/name ?name]]"#, None) + .expect("results") + .into(); + match r { + QueryResults::Tuple(Some(vals)) => { + assert_eq!(vals, + vec![TypedValue::String("Alice".to_string().into()), + TypedValue::Long(14)]); + }, + r => panic!("Unexpected results {:?}", r), + } + + // Who's oldest, via order? + let r = store.q_once(r#"[:find [?name ?age] + :order (desc ?age) + :where + [?x :foo/age ?age] + [?x :foo/name ?name]]"#, None) + .expect("results") + .into(); + match r { + QueryResults::Tuple(Some(vals)) => { + assert_eq!(vals, + vec![TypedValue::String("Carlos".to_string().into()), + TypedValue::Long(42)]); + }, + _ => panic!("Expected tuple."), + } + + // How many of each age do we have? + // Add an extra person to make this interesting. + store.transact(r#"[{:foo/name "Medusa", :foo/age 28}]"#).expect("transacted"); + + // If we omit the 'with', we'll get the wrong answer: + let r = store.q_once(r#"[:find ?age (count ?age) + :order (asc ?age) + :where [_ :foo/age ?age]]"#, None) + .expect("results") + .into(); + + match r { + QueryResults::Rel(vals) => { + assert_eq!(vals, vec![ + vec![TypedValue::Long(14), TypedValue::Long(1)], + vec![TypedValue::Long(22), TypedValue::Long(1)], + vec![TypedValue::Long(28), TypedValue::Long(1)], + vec![TypedValue::Long(42), TypedValue::Long(1)], + ]); + }, + _ => panic!("Expected rel."), + } + + // If we include it, we'll get the right one: + let r = store.q_once(r#"[:find ?age (count ?age) + :with ?person + :order (asc ?age) + :where [?person :foo/age ?age]]"#, None) + .expect("results") + .into(); + + match r { + QueryResults::Rel(vals) => { + assert_eq!(vals, vec![ + vec![TypedValue::Long(14), TypedValue::Long(1)], + vec![TypedValue::Long(22), TypedValue::Long(1)], + vec![TypedValue::Long(28), TypedValue::Long(2)], + vec![TypedValue::Long(42), TypedValue::Long(1)], + ]); + }, + _ => panic!("Expected rel."), + } +} + +#[test] +fn test_combinatorial() { + let mut store = Store::open("").expect("opened"); + + store.transact(r#"[ + [:db/add "a" :db/ident :foo/name] + [:db/add "a" :db/valueType :db.type/string] + [:db/add "a" :db/cardinality :db.cardinality/one] + [:db/add "b" :db/ident :foo/dance] + [:db/add "b" :db/valueType :db.type/ref] + [:db/add "b" :db/cardinality :db.cardinality/many] + [:db/add "b" :db/index true] + ]"#).unwrap(); + + store.transact(r#"[ + [:db/add "a" :foo/name "Alice"] + [:db/add "b" :foo/name "Beli"] + [:db/add "c" :foo/name "Carlos"] + [:db/add "d" :foo/name "Diana"] + + ;; Alice danced with Beli twice. + [:db/add "a" :foo/dance "ab"] + [:db/add "b" :foo/dance "ab"] + [:db/add "a" :foo/dance "ba"] + [:db/add "b" :foo/dance "ba"] + + ;; Carlos danced with Diana. + [:db/add "c" :foo/dance "cd"] + [:db/add "d" :foo/dance "cd"] + + ;; Alice danced with Diana. + [:db/add "a" :foo/dance "ad"] + [:db/add "d" :foo/dance "ad"] + + ]"#).unwrap(); + + // How many different pairings of dancers were there? + // If we just use `!=` (or `differ`), the number is doubled because of symmetry! + assert_eq!(TypedValue::Long(6), + store.q_once(r#"[:find (count ?right) . + :with ?left + :where + [?left :foo/dance ?dance] + [?right :foo/dance ?dance] + [(differ ?left ?right)]]"#, None) + .into_scalar_result() + .expect("scalar results").unwrap()); + + // SQL addresses this by using `<` instead of `!=` -- by imposing + // an order on values, we can ensure that each pair only appears once, not + // once per permutation. + // It's far from ideal to expose an ordering on entids, because developers + // will come to rely on it. Instead we expose a specific operator: `unpermute`. + // When used in a query that generates permuted pairs of references, this + // ensures that only one permutation is returned for a given pair. + assert_eq!(TypedValue::Long(3), + store.q_once(r#"[:find (count ?right) . + :with ?left + :where + [?left :foo/dance ?dance] + [?right :foo/dance ?dance] + [(unpermute ?left ?right)]]"#, None) + .into_scalar_result() + .expect("scalar results").unwrap()); +} + +#[test] +fn test_aggregation_implicit_grouping() { + let mut store = Store::open("").expect("opened"); + + store.transact(r#"[ + [:db/add "a" :db/ident :foo/score] + [:db/add "a" :db/valueType :db.type/long] + [:db/add "a" :db/cardinality :db.cardinality/one] + [:db/add "b" :db/ident :foo/name] + [:db/add "b" :db/valueType :db.type/string] + [:db/add "b" :db/cardinality :db.cardinality/one] + [:db/add "c" :db/ident :foo/is-vegetarian] + [:db/add "c" :db/valueType :db.type/boolean] + [:db/add "c" :db/cardinality :db.cardinality/one] + [:db/add "d" :db/ident :foo/play] + [:db/add "d" :db/valueType :db.type/ref] + [:db/add "d" :db/cardinality :db.cardinality/many] + [:db/add "d" :db/index true] + [:db/add "d" :db/unique :db.unique/value] + ]"#).unwrap(); + + let ids = store.transact(r#"[ + [:db/add "a" :foo/name "Alice"] + [:db/add "b" :foo/name "Beli"] + [:db/add "c" :foo/name "Carlos"] + [:db/add "d" :foo/name "Diana"] + [:db/add "a" :foo/is-vegetarian true] + [:db/add "b" :foo/is-vegetarian true] + [:db/add "c" :foo/is-vegetarian false] + [:db/add "d" :foo/is-vegetarian false] + [:db/add "aa" :foo/score 14] + [:db/add "ab" :foo/score 99] + [:db/add "ac" :foo/score 14] + [:db/add "ba" :foo/score 22] + [:db/add "bb" :foo/score 11] + [:db/add "ca" :foo/score 42] + [:db/add "da" :foo/score 5] + [:db/add "db" :foo/score 28] + [:db/add "d" :foo/play "da"] + [:db/add "d" :foo/play "db"] + [:db/add "a" :foo/play "aa"] + [:db/add "a" :foo/play "ab"] + [:db/add "a" :foo/play "ac"] + [:db/add "b" :foo/play "ba"] + [:db/add "b" :foo/play "bb"] + [:db/add "c" :foo/play "ca"] + ]"#).unwrap().tempids; + + // How many different scores were there? + assert_eq!(TypedValue::Long(7), + store.q_once(r#"[:find (count ?score) . + :where + [?game :foo/score ?score]]"#, None) + .into_scalar_result() + .expect("scalar results").unwrap()); + + // How many different games resulted in scores? + // '14' appears twice. + assert_eq!(TypedValue::Long(8), + store.q_once(r#"[:find (count ?score) . + :with ?game + :where + [?game :foo/score ?score]]"#, None) + .into_scalar_result() + .expect("scalar results").unwrap()); + + // Who's the highest-scoring vegetarian? + assert_eq!(vec!["Alice".into(), TypedValue::Long(99)], + store.q_once(r#"[:find [(the ?name) (max ?score)] + :where + [?game :foo/score ?score] + [?person :foo/play ?game] + [?person :foo/is-vegetarian true] + [?person :foo/name ?name]]"#, None) + .into_tuple_result() + .expect("tuple results").unwrap()); + + // We can't run an ambiguous correspondence. + let res = store.q_once(r#"[:find [(the ?name) (min ?score) (max ?score)] + :where + [?game :foo/score ?score] + [?person :foo/play ?game] + [?person :foo/is-vegetarian true] + [?person :foo/name ?name]]"#, None); + match res { + Result::Err( + Error( + ErrorKind::TranslatorError( + ::mentat_query_translator::ErrorKind::ProjectorError( + ::mentat_query_projector::ErrorKind::AmbiguousAggregates(mmc, cc) + ) + ), _)) => { + assert_eq!(mmc, 2); + assert_eq!(cc, 1); + }, + r => { + panic!("Unexpected result {:?}.", r); + }, + } + + // Max scores for vegetarians. + assert_eq!(vec![vec!["Alice".into(), TypedValue::Long(99)], + vec!["Beli".into(), TypedValue::Long(22)]], + store.q_once(r#"[:find ?name (max ?score) + :where + [?game :foo/score ?score] + [?person :foo/play ?game] + [?person :foo/is-vegetarian true] + [?person :foo/name ?name]]"#, None) + .into_rel_result() + .expect("rel results")); + + // We can combine these aggregates. + let r = store.q_once(r#"[:find ?x ?name (max ?score) (count ?score) (avg ?score) + :with ?game ; So we don't discard duplicate scores! + :where + [?x :foo/name ?name] + [?x :foo/play ?game] + [?game :foo/score ?score]]"#, None) + .expect("results") + .into(); + match r { + QueryResults::Rel(vals) => { + assert_eq!(vals, + vec![ + vec![TypedValue::Ref(ids.get("a").cloned().unwrap()), + TypedValue::String("Alice".to_string().into()), + TypedValue::Long(99), + TypedValue::Long(3), + TypedValue::Double((127f64 / 3f64).into())], + vec![TypedValue::Ref(ids.get("b").cloned().unwrap()), + TypedValue::String("Beli".to_string().into()), + TypedValue::Long(22), + TypedValue::Long(2), + TypedValue::Double((33f64 / 2f64).into())], + vec![TypedValue::Ref(ids.get("c").cloned().unwrap()), + TypedValue::String("Carlos".to_string().into()), + TypedValue::Long(42), + TypedValue::Long(1), + TypedValue::Double(42f64.into())], + vec![TypedValue::Ref(ids.get("d").cloned().unwrap()), + TypedValue::String("Diana".to_string().into()), + TypedValue::Long(28), + TypedValue::Long(2), + TypedValue::Double((33f64 / 2f64).into())]]); + }, + x => panic!("Got unexpected results {:?}", x), + } +}