Compare commits

...

8 commits

Author SHA1 Message Date
Richard Newman
a7b10872e0 Failing test. 2017-11-30 15:03:04 -08:00
Richard Newman
e2e9fae660 DEBUG 2017-11-30 15:03:04 -08:00
Richard Newman
4aa647ddc5 Newtype VariableIterator. 2017-11-30 15:03:04 -08:00
Richard Newman
5fe3d50762 Review comment. 2017-11-30 15:03:04 -08:00
Richard Newman
8e5d7830ee Review comments. 2017-11-30 15:03:04 -08:00
Richard Newman
971e166779 Review comments for simple aggregation. 2017-11-30 15:03:04 -08:00
Richard Newman
3eb898566b Add commented-out failing tests for type code expansion. 2017-11-30 15:03:04 -08:00
Richard Newman
de4d58f614 Parse and algebrize simple aggregates. (#312) 2017-11-30 15:03:04 -08:00
13 changed files with 1005 additions and 87 deletions

View file

@ -342,6 +342,12 @@ impl ValueTypeSet {
} }
} }
impl ValueTypeSet {
pub fn is_only_numeric(&self) -> bool {
self.is_subset(&ValueTypeSet::of_numeric_types())
}
}
impl IntoIterator for ValueTypeSet { impl IntoIterator for ValueTypeSet {
type Item = ValueType; type Item = ValueType;
type IntoIter = ::enum_set::Iter<ValueType>; type IntoIter = ::enum_set::Iter<ValueType>;
@ -367,10 +373,16 @@ impl ::std::iter::Extend<ValueType> for ValueTypeSet {
} }
} }
/// We have an enum of types, `ValueType`. It can be collected into a set, `ValueTypeSet`. Each type
/// is associated with a type tag, which is how a type is represented in, e.g., SQL storage. Types
/// can share type tags, because backing SQL storage is able to differentiate between some types
/// (e.g., longs and doubles), and so distinct tags aren't necessary. That association is defined by
/// `SQLValueType`. That trait similarly extends to `ValueTypeSet`, which maps a collection of types
/// into a collection of tags.
pub trait SQLValueTypeSet { pub trait SQLValueTypeSet {
fn value_type_tags(&self) -> BTreeSet<ValueTypeTag>; fn value_type_tags(&self) -> BTreeSet<ValueTypeTag>;
fn has_unique_type_code(&self) -> bool; fn has_unique_type_tag(&self) -> bool;
fn unique_type_code(&self) -> Option<ValueTypeTag>; fn unique_type_tag(&self) -> Option<ValueTypeTag>;
} }
impl SQLValueTypeSet for ValueTypeSet { impl SQLValueTypeSet for ValueTypeSet {
@ -383,15 +395,15 @@ impl SQLValueTypeSet for ValueTypeSet {
out out
} }
fn unique_type_code(&self) -> Option<ValueTypeTag> { fn unique_type_tag(&self) -> Option<ValueTypeTag> {
if self.is_unit() || self.has_unique_type_code() { if self.is_unit() || self.has_unique_type_tag() {
self.exemplar().map(|t| t.value_type_tag()) self.exemplar().map(|t| t.value_type_tag())
} else { } else {
None None
} }
} }
fn has_unique_type_code(&self) -> bool { fn has_unique_type_tag(&self) -> bool {
if self.is_unit() { if self.is_unit() {
return true; return true;
} }

View file

@ -264,6 +264,18 @@ impl Default for ConjoiningClauses {
} }
} }
pub struct VariableIterator<'a>(
::std::collections::btree_map::Keys<'a, Variable, TypedValue>,
);
impl<'a> Iterator for VariableIterator<'a> {
type Item = &'a Variable;
fn next(&mut self) -> Option<&'a Variable> {
self.0.next()
}
}
impl ConjoiningClauses { impl ConjoiningClauses {
/// Construct a new `ConjoiningClauses` with the provided alias counter. This allows a caller /// Construct a new `ConjoiningClauses` with the provided alias counter. This allows a caller
/// to share a counter with an enclosing scope, and to start counting at a particular offset /// to share a counter with an enclosing scope, and to start counting at a particular offset
@ -370,13 +382,13 @@ impl ConjoiningClauses {
} }
/// Return an interator over the variables externally bound to values. /// Return an interator over the variables externally bound to values.
pub fn value_bound_variables(&self) -> ::std::collections::btree_map::Keys<Variable, TypedValue> { pub fn value_bound_variables(&self) -> VariableIterator {
self.value_bindings.keys() VariableIterator(self.value_bindings.keys())
} }
/// Return a set of the variables externally bound to values. /// Return a set of the variables externally bound to values.
pub fn value_bound_variable_set(&self) -> BTreeSet<Variable> { pub fn value_bound_variable_set(&self) -> BTreeSet<Variable> {
self.value_bindings.keys().cloned().collect() self.value_bound_variables().cloned().collect()
} }
/// Return a single `ValueType` if the given variable is known to have a precise type. /// Return a single `ValueType` if the given variable is known to have a precise type.

View file

@ -127,7 +127,7 @@ impl ConjoiningClauses {
if shared_types == ValueTypeSet::of_one(ValueType::Instant) { if shared_types == ValueTypeSet::of_one(ValueType::Instant) {
left_v = self.resolve_instant_argument(&predicate.operator, 0, left)?; left_v = self.resolve_instant_argument(&predicate.operator, 0, left)?;
right_v = self.resolve_instant_argument(&predicate.operator, 1, right)?; right_v = self.resolve_instant_argument(&predicate.operator, 1, right)?;
} else if !shared_types.is_empty() && shared_types.is_subset(&ValueTypeSet::of_numeric_types()) { } else if !shared_types.is_empty() && shared_types.is_only_numeric() {
left_v = self.resolve_numeric_argument(&predicate.operator, 0, left)?; left_v = self.resolve_numeric_argument(&predicate.operator, 0, left)?;
right_v = self.resolve_numeric_argument(&predicate.operator, 1, right)?; right_v = self.resolve_numeric_argument(&predicate.operator, 1, right)?;
} else { } else {

View file

@ -62,8 +62,19 @@ pub use types::{
pub struct AlgebraicQuery { pub struct AlgebraicQuery {
default_source: SrcVar, default_source: SrcVar,
pub find_spec: FindSpec, pub find_spec: FindSpec,
has_aggregates: bool,
/// The set of variables that the caller wishes to be used for grouping when aggregating.
/// These are specified in the query input, as `:with`, and are then chewed up during projection.
/// If no variables are supplied, then no additional grouping is necessary beyond the
/// non-aggregated projection list.
pub with: BTreeSet<Variable>, pub with: BTreeSet<Variable>,
/// Some query features, such as ordering, are implemented by implicit reference to SQL columns.
/// In order for these references to be 'live', those columns must be projected.
/// This is the set of variables that must be so projected.
/// This is not necessarily every variable that will be so required -- some variables
/// will already be in the projection list.
pub named_projection: BTreeSet<Variable>,
pub order: Option<Vec<OrderBy>>, pub order: Option<Vec<OrderBy>>,
pub limit: Limit, pub limit: Limit,
pub cc: clauses::ConjoiningClauses, pub cc: clauses::ConjoiningClauses,
@ -187,15 +198,14 @@ pub fn algebrize_with_inputs(schema: &Schema,
cc.prune_extracted_types(); cc.prune_extracted_types();
let (order, extra_vars) = validate_and_simplify_order(&cc, parsed.order)?; let (order, extra_vars) = validate_and_simplify_order(&cc, parsed.order)?;
let with: BTreeSet<Variable> = parsed.with.into_iter().chain(extra_vars.into_iter()).collect();
// This might leave us with an unused `:in` variable. // This might leave us with an unused `:in` variable.
let limit = if parsed.find_spec.is_unit_limited() { Limit::Fixed(1) } else { parsed.limit }; let limit = if parsed.find_spec.is_unit_limited() { Limit::Fixed(1) } else { parsed.limit };
let q = AlgebraicQuery { let q = AlgebraicQuery {
default_source: parsed.default_source, default_source: parsed.default_source,
find_spec: parsed.find_spec, find_spec: parsed.find_spec,
has_aggregates: false, // TODO: we don't parse them yet. with: parsed.with,
with: with, named_projection: extra_vars,
order: order, order: order,
limit: limit, limit: limit,
cc: cc, cc: cc,

View file

@ -38,6 +38,7 @@ use self::mentat_parser_utils::value_and_span::{
}; };
use self::mentat_query::{ use self::mentat_query::{
Aggregate,
Binding, Binding,
Direction, Direction,
Element, Element,
@ -270,6 +271,13 @@ def_parser!(Query, func, (QueryFunction, Vec<FnArg>), {
(Query::query_function(), Query::arguments()) (Query::query_function(), Query::arguments())
}); });
def_parser!(Query, aggregate, Aggregate, {
seq().of_exactly(Query::func())
.map(|(func, args)| Aggregate {
func, args,
})
});
/// A vector containing just a parenthesized filter expression. /// A vector containing just a parenthesized filter expression.
def_parser!(Where, pred, WhereClause, { def_parser!(Where, pred, WhereClause, {
// Accept either a nested list or a nested vector here: // Accept either a nested list or a nested vector here:
@ -376,6 +384,7 @@ def_matches_plain_symbol!(Find, placeholder, "_");
def_parser!(Find, elem, Element, { def_parser!(Find, elem, Element, {
Query::variable().map(Element::Variable) Query::variable().map(Element::Variable)
.or(Query::aggregate().map(Element::Aggregate))
}); });
def_parser!(Find, find_scalar, FindSpec, { def_parser!(Find, find_scalar, FindSpec, {

View file

@ -19,7 +19,9 @@ extern crate mentat_query_algebrizer;
extern crate mentat_query_sql; extern crate mentat_query_sql;
extern crate mentat_sql; extern crate mentat_sql;
use std::collections::BTreeSet;
use std::iter; use std::iter;
use rusqlite::{ use rusqlite::{
Row, Row,
Rows, Rows,
@ -39,9 +41,12 @@ use mentat_db::{
}; };
use mentat_query::{ use mentat_query::{
Aggregate,
Element, Element,
FindSpec, FindSpec,
Limit, Limit,
PlainSymbol,
QueryFunction,
Variable, Variable,
}; };
@ -49,11 +54,14 @@ use mentat_query_algebrizer::{
AlgebraicQuery, AlgebraicQuery,
ColumnName, ColumnName,
ConjoiningClauses, ConjoiningClauses,
QualifiedAlias,
VariableColumn, VariableColumn,
}; };
use mentat_query_sql::{ use mentat_query_sql::{
ColumnOrExpression, ColumnOrExpression,
Expression,
GroupBy,
Name, Name,
Projection, Projection,
ProjectedColumn, ProjectedColumn,
@ -64,6 +72,31 @@ error_chain! {
Error, ErrorKind, ResultExt, Result; Error, ErrorKind, ResultExt, Result;
} }
errors {
/// We're just not done yet. Message that the feature is recognized but not yet
/// implemented.
NotYetImplemented(t: String) {
description("not yet implemented")
display("not yet implemented: {}", t)
}
CannotProjectImpossibleBinding(op: SimpleAggregationOp) {
description("no possible types for variable in projection list")
display("no possible types for value provided to {:?}", op)
}
CannotApplyAggregateOperationToTypes(op: SimpleAggregationOp, types: ValueTypeSet) {
description("cannot apply projection operation to types")
display("cannot apply projection operation {:?} to types {:?}", op, types)
}
UnboundVariable(var: PlainSymbol) {
description("cannot project unbound variable")
display("cannot project unbound variable {:?}", var)
}
NoTypeAvailableForVariable(var: PlainSymbol) {
description("cannot find type for variable")
display("cannot find type for variable {:?}", var)
}
}
foreign_links { foreign_links {
Rusqlite(rusqlite::Error); Rusqlite(rusqlite::Error);
} }
@ -133,16 +166,16 @@ impl TypedIndex {
/// Look up this index and type(index) pair in the provided row. /// Look up this index and type(index) pair in the provided row.
/// This function will panic if: /// This function will panic if:
/// ///
/// - This is an `Unknown` and the retrieved type code isn't an i32. /// - This is an `Unknown` and the retrieved type tag isn't an i32.
/// - If the retrieved value can't be coerced to a rusqlite `Value`. /// - If the retrieved value can't be coerced to a rusqlite `Value`.
/// - Either index is out of bounds. /// - Either index is out of bounds.
/// ///
/// Because we construct our SQL projection list, the code that stored the data, and this /// Because we construct our SQL projection list, the tag that stored the data, and this
/// consumer, a panic here implies that we have a bad bug — we put data of a very wrong type in /// consumer, a panic here implies that we have a bad bug — we put data of a very wrong type in
/// a row, and thus can't coerce to Value, we're retrieving from the wrong place, or our /// a row, and thus can't coerce to Value, we're retrieving from the wrong place, or our
/// generated SQL is junk. /// generated SQL is junk.
/// ///
/// This function will return a runtime error if the type code is unknown, or the value is /// This function will return a runtime error if the type tag is unknown, or the value is
/// otherwise not convertible by the DB layer. /// otherwise not convertible by the DB layer.
fn lookup<'a, 'stmt>(&self, row: &Row<'a, 'stmt>) -> Result<TypedValue> { fn lookup<'a, 'stmt>(&self, row: &Row<'a, 'stmt>) -> Result<TypedValue> {
use TypedIndex::*; use TypedIndex::*;
@ -161,17 +194,22 @@ impl TypedIndex {
} }
} }
fn candidate_column(cc: &ConjoiningClauses, var: &Variable) -> (ColumnOrExpression, Name) { fn cc_column(cc: &ConjoiningClauses, var: &Variable) -> Result<QualifiedAlias> {
cc.column_bindings
.get(var)
.and_then(|cols| cols.get(0).cloned())
.ok_or_else(|| ErrorKind::UnboundVariable(var.name()).into())
}
fn candidate_column(cc: &ConjoiningClauses, var: &Variable) -> Result<(ColumnOrExpression, Name)> {
// Every variable should be bound by the top-level CC to at least // Every variable should be bound by the top-level CC to at least
// one column in the query. If that constraint is violated it's a // one column in the query. If that constraint is violated it's a
// bug in our code, so it's appropriate to panic here. // bug in our code, so it's appropriate to panic here.
let columns = cc.column_bindings cc_column(cc, var)
.get(var) .map(|qa| {
.expect(format!("Every variable should have a binding, but {:?} does not", var).as_str()); let name = VariableColumn::Variable(var.clone()).column_name();
(ColumnOrExpression::Column(qa), name)
let qa = columns[0].clone(); })
let name = VariableColumn::Variable(var.clone()).column_name();
(ColumnOrExpression::Column(qa), name)
} }
fn candidate_type_column(cc: &ConjoiningClauses, var: &Variable) -> (ColumnOrExpression, Name) { fn candidate_type_column(cc: &ConjoiningClauses, var: &Variable) -> (ColumnOrExpression, Name) {
@ -183,21 +221,207 @@ fn candidate_type_column(cc: &ConjoiningClauses, var: &Variable) -> (ColumnOrExp
} }
/// Return the projected column -- that is, a value or SQL column and an associated name -- for a /// Return the projected column -- that is, a value or SQL column and an associated name -- for a
/// given variable. Also return the type, if known. /// given variable. Also return the type.
/// Callers are expected to determine whether to project a type tag as an additional SQL column. /// Callers are expected to determine whether to project a type tag as an additional SQL column.
pub fn projected_column_for_var(var: &Variable, cc: &ConjoiningClauses) -> (ProjectedColumn, Option<ValueType>) { pub fn projected_column_for_var(var: &Variable, cc: &ConjoiningClauses) -> Result<(ProjectedColumn, ValueTypeSet)> {
if let Some(value) = cc.bound_value(&var) { if let Some(value) = cc.bound_value(&var) {
// If we already know the value, then our lives are easy. // If we already know the value, then our lives are easy.
let tag = value.value_type(); let tag = value.value_type();
let name = VariableColumn::Variable(var.clone()).column_name(); let name = VariableColumn::Variable(var.clone()).column_name();
(ProjectedColumn(ColumnOrExpression::Value(value.clone()), name), Some(tag)) Ok((ProjectedColumn(ColumnOrExpression::Value(value.clone()), name), ValueTypeSet::of_one(tag)))
} else { } else {
// If we don't, then the CC *must* have bound the variable. // If we don't, then the CC *must* have bound the variable.
let (column, name) = candidate_column(cc, var); let (column, name) = candidate_column(cc, var)?;
(ProjectedColumn(column, name), cc.known_type(var)) Ok((ProjectedColumn(column, name), cc.known_type_set(var)))
} }
} }
fn projected_column_for_simple_aggregate(simple: &SimpleAggregate, cc: &ConjoiningClauses) -> Result<(ProjectedColumn, ValueType)> {
let known_types = cc.known_type_set(&simple.var);
let return_type = simple.op.is_applicable_to_types(known_types)?;
let projected_column_or_expression =
if let Some(value) = cc.bound_value(&simple.var) {
// Oh, we already know the value!
if simple.use_static_value() {
// We can statically compute the aggregate result for some operators -- not count or
// sum, but avg/max/min are OK.
ColumnOrExpression::Value(value)
} else {
let expression = Expression::Unary {
sql_op: simple.op.to_sql(),
arg: ColumnOrExpression::Value(value),
};
ColumnOrExpression::Expression(Box::new(expression), return_type)
}
} else {
// The common case: the values are bound during execution.
let column = cc_column(cc, &simple.var)?;
let expression = Expression::Unary {
sql_op: simple.op.to_sql(),
arg: ColumnOrExpression::Column(column),
};
ColumnOrExpression::Expression(Box::new(expression), return_type)
};
Ok((ProjectedColumn(projected_column_or_expression, simple.column_name()), return_type))
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum SimpleAggregationOp {
Avg,
Count,
Max,
Min,
Sum,
}
impl SimpleAggregationOp {
fn to_sql(&self) -> &'static str {
use SimpleAggregationOp::*;
match self {
&Avg => "avg",
&Count => "count",
&Max => "max",
&Min => "min",
&Sum => "sum",
}
}
fn for_function(function: &QueryFunction) -> Option<SimpleAggregationOp> {
match function.0.plain_name() {
"avg" => Some(SimpleAggregationOp::Avg),
"count" => Some(SimpleAggregationOp::Count),
"max" => Some(SimpleAggregationOp::Max),
"min" => Some(SimpleAggregationOp::Min),
"sum" => Some(SimpleAggregationOp::Sum),
_ => None,
}
}
/// With knowledge of the types to which a variable might be bound,
/// return a `Result` to determine whether this aggregation is suitable.
/// For example, it's valid to take the `Avg` of `{Double, Long}`, invalid
/// to take `Sum` of `{Instant}`, valid to take (lexicographic) `Max` of `{String}`,
/// but invalid to take `Max` of `{Uuid, String}`.
///
/// The returned type is the type of the result of the aggregation.
fn is_applicable_to_types(&self, possibilities: ValueTypeSet) -> Result<ValueType> {
use SimpleAggregationOp::*;
if possibilities.is_empty() {
bail!(ErrorKind::CannotProjectImpossibleBinding(*self))
}
match self {
// One can always count results.
&Count => Ok(ValueType::Long),
// Only numeric types can be averaged or summed.
&Avg => {
if possibilities.is_only_numeric() {
// The mean of a set of numeric values will always, for our purposes, be a double.
Ok(ValueType::Double)
} else {
bail!(ErrorKind::CannotApplyAggregateOperationToTypes(*self, possibilities))
}
},
&Sum => {
if possibilities.is_only_numeric() {
if possibilities.contains(ValueType::Double) {
Ok(ValueType::Double)
} else {
// TODO: BigInt.
Ok(ValueType::Long)
}
} else {
bail!(ErrorKind::CannotApplyAggregateOperationToTypes(*self, possibilities))
}
},
&Max | &Min => {
if possibilities.is_unit() {
use ValueType::*;
let the_type = possibilities.exemplar().expect("a type");
match the_type {
// These types are numerically ordered.
Double | Long | Instant => Ok(the_type),
// Boolean: false < true.
Boolean => Ok(the_type),
// String: lexicographic order.
String => Ok(the_type),
// These types are unordered.
Keyword | Ref | Uuid => {
bail!(ErrorKind::CannotApplyAggregateOperationToTypes(*self, possibilities))
},
}
} else {
// It cannot be empty -- we checked.
// The only types that are valid to compare cross-type are numbers.
if possibilities.is_only_numeric() {
// Note that if the max/min is a Long, it will be returned as a Double!
if possibilities.contains(ValueType::Double) {
Ok(ValueType::Double)
} else {
// TODO: BigInt.
Ok(ValueType::Long)
}
} else {
bail!(ErrorKind::CannotApplyAggregateOperationToTypes(*self, possibilities))
}
}
},
}
}
}
struct SimpleAggregate {
op: SimpleAggregationOp,
var: Variable,
}
impl SimpleAggregate {
fn column_name(&self) -> Name {
format!("({} {})", self.op.to_sql(), self.var.name())
}
fn use_static_value(&self) -> bool {
use SimpleAggregationOp::*;
match self.op {
Avg | Max | Min => true,
Count | Sum => false,
}
}
}
trait SimpleAggregation {
fn to_simple(&self) -> Option<SimpleAggregate>;
}
impl SimpleAggregation for Aggregate {
fn to_simple(&self) -> Option<SimpleAggregate> {
if self.args.len() != 1 {
return None;
}
self.args[0]
.as_variable()
.and_then(|v| SimpleAggregationOp::for_function(&self.func)
.map(|op| SimpleAggregate { op, var: v.clone(), }))
}
}
/// An internal temporary struct to pass between the projection 'walk' and the
/// resultant projector.
/// Projection accumulates three things:
/// - A SQL projection list.
/// - A collection of templates for the projector to use to extract values.
/// - A list of columns to use for grouping. Grouping is a property of the projection!
struct ProjectedElements {
sql_projection: Projection,
templates: Vec<TypedIndex>,
group_by: Vec<GroupBy>,
}
/// Walk an iterator of `Element`s, collecting projector templates and columns. /// Walk an iterator of `Element`s, collecting projector templates and columns.
/// ///
/// Returns a pair: the SQL projection (which should always be a `Projection::Columns`) /// Returns a pair: the SQL projection (which should always be a `Projection::Columns`)
@ -213,26 +437,44 @@ pub fn projected_column_for_var(var: &Variable, cc: &ConjoiningClauses) -> (Proj
fn project_elements<'a, I: IntoIterator<Item = &'a Element>>( fn project_elements<'a, I: IntoIterator<Item = &'a Element>>(
count: usize, count: usize,
elements: I, elements: I,
query: &AlgebraicQuery) -> Result<(Projection, Vec<TypedIndex>)> { query: &AlgebraicQuery) -> Result<ProjectedElements> {
let mut cols = Vec::with_capacity(count); let mut cols = Vec::with_capacity(count);
let mut i: i32 = 0; let mut i: i32 = 0;
let mut templates = vec![]; let mut templates = vec![];
let mut with = query.with.clone();
// "Query variables not in aggregate expressions will group the results and appear intact
// in the result."
// Compute the set of variables projected by the query, then subtract
// those used in aggregate expressions. This will be our GROUP BY clause.
// The GROUP BY clause should begin with any non-projected :with variables, in order,
// then the non-aggregated projected variables, in order.
// Predetermined:
// extras: variables needed for ORDER BY. query.named_projection.
// with: variables to be used for grouping. query.with.
//
// Accumulated:
// variables: variables in the projection list.
// aggregated: variables used in aggregates.
// Results:
// group_by: (with + variables) - aggregated
// extra_projection: (with + extras) - variables
let mut aggregated = BTreeSet::new();
let mut variables = BTreeSet::new();
for e in elements { for e in elements {
match e { match e {
// Each time we come across a variable, we push a SQL column // Each time we come across a variable, we push a SQL column
// into the SQL projection, aliased to the name of the variable, // into the SQL projection, aliased to the name of the variable,
// and we push an annotated index into the projector. // and we push an annotated index into the projector.
&Element::Variable(ref var) => { &Element::Variable(ref var) => {
// If we're projecting this, we don't need it in :with. variables.insert(var.clone());
with.remove(var);
let (projected_column, maybe_type) = projected_column_for_var(&var, &query.cc); let (projected_column, type_set) = projected_column_for_var(&var, &query.cc)?;
cols.push(projected_column); cols.push(projected_column);
if let Some(ty) = maybe_type { if let Some(tag) = type_set.unique_type_tag() {
let tag = ty.value_type_tag();
templates.push(TypedIndex::Known(i, tag)); templates.push(TypedIndex::Known(i, tag));
i += 1; // We used one SQL column. i += 1; // We used one SQL column.
} else { } else {
@ -243,23 +485,114 @@ fn project_elements<'a, I: IntoIterator<Item = &'a Element>>(
let (type_column, type_name) = candidate_type_column(&query.cc, &var); let (type_column, type_name) = candidate_type_column(&query.cc, &var);
cols.push(ProjectedColumn(type_column, type_name)); cols.push(ProjectedColumn(type_column, type_name));
} }
} },
&Element::Aggregate(ref a) => {
if let Some(simple) = a.to_simple() {
aggregated.insert(simple.var.clone());
// When we encounter a simple aggregate -- one in which the aggregation can be
// implemented in SQL, on a single variable -- we just push the SQL aggregation op.
// We must ensure the following:
// - There's a column for the var.
// - The type of the var is known to be restricted to a sensible input set
// (not necessarily a single type, but e.g., all vals must be Double or Long).
// - The type set must be appropriate for the operation. E.g., `Sum` is not a
// meaningful operation on instants.
let (projected_column, return_type) = projected_column_for_simple_aggregate(&simple, &query.cc)?;
cols.push(projected_column);
// We might regret using the type tag here instead of the `ValueType`.
templates.push(TypedIndex::Known(i, return_type.value_type_tag()));
i += 1;
} else {
// TODO: complex aggregates.
bail!(ErrorKind::NotYetImplemented("complex aggregates".into()));
}
},
} }
} }
for var in with { // Anything we're projecting, or that's part of an aggregate, doesn't need to be in GROUP BY.
// We need to collect these into the SQL column list, but they don't affect projection. //
// If a variable is of a non-fixed type, also project the type tag column, so we don't // Anything used in ORDER BY (which we're given in `named_projection`)
// accidentally unify across types when considering uniqueness! // needs to be in the SQL column list so we can refer to it by name.
let (column, name) = candidate_column(&query.cc, &var); //
// They don't affect projection.
//
// If a variable is of a non-fixed type, also project the type tag column, so we don't
// accidentally unify across types when considering uniqueness!
// Similarly, the type tag needs to be grouped.
// extra_projection: extras - variables
for var in query.named_projection.iter() {
if variables.contains(var) {
continue;
}
// If it's a fixed value, we need do nothing further.
if query.cc.is_value_bound(&var) {
continue;
}
let (column, name) = candidate_column(&query.cc, &var)?;
cols.push(ProjectedColumn(column, name)); cols.push(ProjectedColumn(column, name));
if query.cc.known_type(&var).is_none() {
// We don't care if a column has a single _type_, we care if it has a single type _tag_,
// because that's what we'll use if we're projecting. E.g., Long and Double.
// Single type implies single type tag, and is cheaper, so we check that first.
let types = query.cc.known_type_set(&var);
if !types.has_unique_type_tag() {
let (type_column, type_name) = candidate_type_column(&query.cc, &var); let (type_column, type_name) = candidate_type_column(&query.cc, &var);
cols.push(ProjectedColumn(type_column, type_name)); cols.push(ProjectedColumn(type_column, type_name));
} }
} }
Ok((Projection::Columns(cols), templates)) if aggregated.is_empty() {
// We're done -- we never need to group unless we're aggregating.
return Ok(ProjectedElements {
sql_projection: Projection::Columns(cols),
templates,
group_by: vec![],
});
}
// group_by: (with + variables) - aggregated
let mut group_by_vars: BTreeSet<Variable> = query.with.union(&variables).cloned().collect();
for var in aggregated.iter() {
group_by_vars.remove(var);
}
// We never need to group by a constant.
for var in query.cc.value_bound_variables() {
group_by_vars.remove(&var);
}
// Turn this collection of vars into a collection of columns from the query.
// We don't allow grouping on anything but a variable bound in the query.
// We group by tag if necessary.
let mut group_by = Vec::with_capacity(2 * group_by_vars.len());
for var in group_by_vars {
let types = query.cc.known_type_set(&var);
if !types.has_unique_type_tag() {
// Group by type then SQL value.
let type_col = query.cc
.extracted_types
.get(&var)
.cloned()
.map(GroupBy::QueryColumn)
.ok_or_else(|| ErrorKind::NoTypeAvailableForVariable(var.name().clone()))?;
group_by.push(type_col);
}
let val_col = cc_column(&query.cc, &var).map(GroupBy::QueryColumn)?;
group_by.push(val_col);
}
Ok(ProjectedElements {
sql_projection: Projection::Columns(cols),
templates,
group_by,
})
} }
pub trait Projector { pub trait Projector {
@ -295,12 +628,13 @@ impl ScalarProjector {
} }
} }
fn combine(sql: Projection, mut templates: Vec<TypedIndex>) -> Result<CombinedProjection> { fn combine(mut elements: ProjectedElements) -> Result<CombinedProjection> {
let template = templates.pop().expect("Expected a single template"); let template = elements.templates.pop().expect("Expected a single template");
Ok(CombinedProjection { Ok(CombinedProjection {
sql_projection: sql, sql_projection: elements.sql_projection,
datalog_projector: Box::new(ScalarProjector::with_template(template)), datalog_projector: Box::new(ScalarProjector::with_template(template)),
distinct: false, distinct: false,
group_by_cols: elements.group_by,
}) })
} }
} }
@ -333,19 +667,22 @@ impl TupleProjector {
// This is exactly the same as for rel. // This is exactly the same as for rel.
fn collect_bindings<'a, 'stmt>(&self, row: Row<'a, 'stmt>) -> Result<Vec<TypedValue>> { fn collect_bindings<'a, 'stmt>(&self, row: Row<'a, 'stmt>) -> Result<Vec<TypedValue>> {
assert_eq!(row.column_count(), self.len as i32); // gte 'cos we might be querying extra columns for ordering.
// The templates will take care of ignoring columns.
assert!(row.column_count() >= self.len as i32);
self.templates self.templates
.iter() .iter()
.map(|ti| ti.lookup(&row)) .map(|ti| ti.lookup(&row))
.collect::<Result<Vec<TypedValue>>>() .collect::<Result<Vec<TypedValue>>>()
} }
fn combine(column_count: usize, sql: Projection, templates: Vec<TypedIndex>) -> Result<CombinedProjection> { fn combine(column_count: usize, elements: ProjectedElements) -> Result<CombinedProjection> {
let p = TupleProjector::with_templates(column_count, templates); let p = TupleProjector::with_templates(column_count, elements.templates);
Ok(CombinedProjection { Ok(CombinedProjection {
sql_projection: sql, sql_projection: elements.sql_projection,
datalog_projector: Box::new(p), datalog_projector: Box::new(p),
distinct: false, distinct: false,
group_by_cols: elements.group_by,
}) })
} }
} }
@ -381,19 +718,22 @@ impl RelProjector {
} }
fn collect_bindings<'a, 'stmt>(&self, row: Row<'a, 'stmt>) -> Result<Vec<TypedValue>> { fn collect_bindings<'a, 'stmt>(&self, row: Row<'a, 'stmt>) -> Result<Vec<TypedValue>> {
assert_eq!(row.column_count(), self.len as i32); // gte 'cos we might be querying extra columns for ordering.
// The templates will take care of ignoring columns.
assert!(row.column_count() >= self.len as i32);
self.templates self.templates
.iter() .iter()
.map(|ti| ti.lookup(&row)) .map(|ti| ti.lookup(&row))
.collect::<Result<Vec<TypedValue>>>() .collect::<Result<Vec<TypedValue>>>()
} }
fn combine(column_count: usize, sql: Projection, templates: Vec<TypedIndex>) -> Result<CombinedProjection> { fn combine(column_count: usize, elements: ProjectedElements) -> Result<CombinedProjection> {
let p = RelProjector::with_templates(column_count, templates); let p = RelProjector::with_templates(column_count, elements.templates);
Ok(CombinedProjection { Ok(CombinedProjection {
sql_projection: sql, sql_projection: elements.sql_projection,
datalog_projector: Box::new(p), datalog_projector: Box::new(p),
distinct: true, distinct: true,
group_by_cols: elements.group_by,
}) })
} }
} }
@ -423,12 +763,13 @@ impl CollProjector {
} }
} }
fn combine(sql: Projection, mut templates: Vec<TypedIndex>) -> Result<CombinedProjection> { fn combine(mut elements: ProjectedElements) -> Result<CombinedProjection> {
let template = templates.pop().expect("Expected a single template"); let template = elements.templates.pop().expect("Expected a single template");
Ok(CombinedProjection { Ok(CombinedProjection {
sql_projection: sql, sql_projection: elements.sql_projection,
datalog_projector: Box::new(CollProjector::with_template(template)), datalog_projector: Box::new(CollProjector::with_template(template)),
distinct: true, distinct: true,
group_by_cols: elements.group_by,
}) })
} }
} }
@ -458,6 +799,9 @@ pub struct CombinedProjection {
/// True if this query requires the SQL query to include DISTINCT. /// True if this query requires the SQL query to include DISTINCT.
pub distinct: bool, pub distinct: bool,
// A list of column names to use as a GROUP BY clause.
pub group_by_cols: Vec<GroupBy>,
} }
impl CombinedProjection { impl CombinedProjection {
@ -488,29 +832,30 @@ pub fn query_projection(query: &AlgebraicQuery) -> Result<CombinedProjection> {
sql_projection: Projection::One, sql_projection: Projection::One,
datalog_projector: Box::new(constant_projector), datalog_projector: Box::new(constant_projector),
distinct: false, distinct: false,
group_by_cols: vec![],
}) })
} else { } else {
match query.find_spec { match query.find_spec {
FindColl(ref element) => { FindColl(ref element) => {
let (cols, templates) = project_elements(1, iter::once(element), query)?; let e = project_elements(1, iter::once(element), query)?;
CollProjector::combine(cols, templates).map(|p| p.flip_distinct_for_limit(&query.limit)) CollProjector::combine(e).map(|p| p.flip_distinct_for_limit(&query.limit))
}, },
FindScalar(ref element) => { FindScalar(ref element) => {
let (cols, templates) = project_elements(1, iter::once(element), query)?; let e = project_elements(1, iter::once(element), query)?;
ScalarProjector::combine(cols, templates) ScalarProjector::combine(e)
}, },
FindRel(ref elements) => { FindRel(ref elements) => {
let column_count = query.find_spec.expected_column_count(); let column_count = query.find_spec.expected_column_count();
let (cols, templates) = project_elements(column_count, elements, query)?; let e = project_elements(column_count, elements, query)?;
RelProjector::combine(column_count, cols, templates).map(|p| p.flip_distinct_for_limit(&query.limit)) RelProjector::combine(column_count, e).map(|p| p.flip_distinct_for_limit(&query.limit))
}, },
FindTuple(ref elements) => { FindTuple(ref elements) => {
let column_count = query.find_spec.expected_column_count(); let column_count = query.find_spec.expected_column_count();
let (cols, templates) = project_elements(column_count, elements, query)?; let e = project_elements(column_count, elements, query)?;
TupleProjector::combine(column_count, cols, templates) TupleProjector::combine(column_count, e)
}, },
} }
} }

View file

@ -0,0 +1,87 @@
// Copyright 2016 Mozilla
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use
// this file except in compliance with the License. You may obtain a copy of the
// License at http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
extern crate mentat_core;
extern crate mentat_query;
extern crate mentat_query_algebrizer;
extern crate mentat_query_parser;
extern crate mentat_query_projector;
use mentat_core::{
Attribute,
Entid,
Schema,
ValueType,
};
use mentat_query_parser::{
parse_find_string,
};
use mentat_query::{
NamespacedKeyword,
};
use mentat_query_algebrizer::{
algebrize,
};
use mentat_query_projector::{
query_projection,
};
// These are helpers that tests use to build Schema instances.
fn associate_ident(schema: &mut Schema, i: NamespacedKeyword, e: Entid) {
schema.entid_map.insert(e, i.clone());
schema.ident_map.insert(i.clone(), e);
}
fn add_attribute(schema: &mut Schema, e: Entid, a: Attribute) {
schema.schema_map.insert(e, a);
}
fn prepopulated_schema() -> Schema {
let mut schema = Schema::default();
associate_ident(&mut schema, NamespacedKeyword::new("foo", "name"), 65);
associate_ident(&mut schema, NamespacedKeyword::new("foo", "age"), 68);
associate_ident(&mut schema, NamespacedKeyword::new("foo", "height"), 69);
add_attribute(&mut schema, 65, Attribute {
value_type: ValueType::String,
multival: false,
..Default::default()
});
add_attribute(&mut schema, 68, Attribute {
value_type: ValueType::Long,
multival: false,
..Default::default()
});
add_attribute(&mut schema, 69, Attribute {
value_type: ValueType::Long,
multival: false,
..Default::default()
});
schema
}
#[test]
fn test_aggregate_unsuitable_type() {
let schema = prepopulated_schema();
let query = r#"[:find (avg ?e)
:where
[?e :foo/age ?a]]"#;
// While the query itself algebrizes and parses…
let parsed = parse_find_string(query).expect("query input to have parsed");
let algebrized = algebrize(&schema, parsed).expect("query algebrizes");
// … when we look at the projection list, we cannot reconcile the types.
assert!(query_projection(&algebrized).is_err());
}

View file

@ -15,10 +15,10 @@ extern crate mentat_query_algebrizer;
extern crate mentat_sql; extern crate mentat_sql;
use std::boxed::Box; use std::boxed::Box;
use mentat_core::{ use mentat_core::{
Entid, Entid,
TypedValue, TypedValue,
ValueType,
}; };
use mentat_query::{ use mentat_query::{
@ -60,6 +60,11 @@ pub enum ColumnOrExpression {
Integer(i32), // We use these for type codes etc. Integer(i32), // We use these for type codes etc.
Long(i64), Long(i64),
Value(TypedValue), Value(TypedValue),
Expression(Box<Expression>, ValueType), // Track the return type.
}
pub enum Expression {
Unary { sql_op: &'static str, arg: ColumnOrExpression },
} }
/// `QueryValue` and `ColumnOrExpression` are almost identical… merge somehow? /// `QueryValue` and `ColumnOrExpression` are almost identical… merge somehow?
@ -84,6 +89,26 @@ pub enum Projection {
One, One,
} }
#[derive(PartialEq, Eq)]
pub enum GroupBy {
ProjectedColumn(Name),
QueryColumn(QualifiedAlias),
// TODO: non-projected expressions, etc.
}
impl QueryFragment for GroupBy {
fn push_sql(&self, out: &mut QueryBuilder) -> BuildQueryResult {
match self {
&GroupBy::ProjectedColumn(ref name) => {
out.push_identifier(name.as_str())
},
&GroupBy::QueryColumn(ref qa) => {
qualified_alias_push_sql(out, qa)
},
}
}
}
#[derive(Copy, Clone)] #[derive(Copy, Clone)]
pub struct Op(pub &'static str); // TODO: we can do better than this! pub struct Op(pub &'static str); // TODO: we can do better than this!
@ -185,6 +210,7 @@ pub struct SelectQuery {
pub projection: Projection, pub projection: Projection,
pub from: FromClause, pub from: FromClause,
pub constraints: Vec<Constraint>, pub constraints: Vec<Constraint>,
pub group_by: Vec<GroupBy>,
pub order: Vec<OrderBy>, pub order: Vec<OrderBy>,
pub limit: Limit, pub limit: Limit,
} }
@ -257,10 +283,8 @@ impl QueryFragment for ColumnOrExpression {
fn push_sql(&self, out: &mut QueryBuilder) -> BuildQueryResult { fn push_sql(&self, out: &mut QueryBuilder) -> BuildQueryResult {
use self::ColumnOrExpression::*; use self::ColumnOrExpression::*;
match self { match self {
&Column(QualifiedAlias(ref table, ref column)) => { &Column(ref qa) => {
out.push_identifier(table.as_str())?; qualified_alias_push_sql(out, qa)
out.push_sql(".");
push_column(out, column)
}, },
&Entid(entid) => { &Entid(entid) => {
out.push_sql(entid.to_string().as_str()); out.push_sql(entid.to_string().as_str());
@ -277,6 +301,23 @@ impl QueryFragment for ColumnOrExpression {
&Value(ref v) => { &Value(ref v) => {
out.push_typed_value(v) out.push_typed_value(v)
}, },
&Expression(ref e, _) => {
e.push_sql(out)
},
}
}
}
impl QueryFragment for Expression {
fn push_sql(&self, out: &mut QueryBuilder) -> BuildQueryResult {
match self {
&Expression::Unary { ref sql_op, ref arg } => {
out.push_sql(sql_op); // No need to escape built-ins.
out.push_sql("(");
arg.push_sql(out)?;
out.push_sql(")");
Ok(())
},
} }
} }
} }
@ -379,6 +420,13 @@ impl QueryFragment for JoinOp {
} }
} }
// We don't own QualifiedAlias or QueryFragment, so we can't implement the trait.
fn qualified_alias_push_sql(out: &mut QueryBuilder, qa: &QualifiedAlias) -> BuildQueryResult {
out.push_identifier(qa.0.as_str())?;
out.push_sql(".");
push_column(out, &qa.1)
}
// We don't own SourceAlias or QueryFragment, so we can't implement the trait. // We don't own SourceAlias or QueryFragment, so we can't implement the trait.
fn source_alias_push_sql(out: &mut QueryBuilder, sa: &SourceAlias) -> BuildQueryResult { fn source_alias_push_sql(out: &mut QueryBuilder, sa: &SourceAlias) -> BuildQueryResult {
let &SourceAlias(ref table, ref alias) = sa; let &SourceAlias(ref table, ref alias) = sa;
@ -527,6 +575,16 @@ impl QueryFragment for SelectQuery {
{ out.push_sql(" AND ") }); { out.push_sql(" AND ") });
} }
match &self.group_by {
group_by if !group_by.is_empty() => {
out.push_sql(" GROUP BY ");
interpose!(group, group_by,
{ group.push_sql(out)? },
{ out.push_sql(", ") });
},
_ => {},
}
if !self.order.is_empty() { if !self.order.is_empty() {
out.push_sql(" ORDER BY "); out.push_sql(" ORDER BY ");
interpose!(&OrderBy(ref dir, ref var), self.order, interpose!(&OrderBy(ref dir, ref var), self.order,
@ -727,6 +785,7 @@ mod tests {
right: ColumnOrExpression::Entid(65536), right: ColumnOrExpression::Entid(65536),
}, },
], ],
group_by: vec![],
order: vec![], order: vec![],
limit: Limit::None, limit: Limit::None,
}; };

View file

@ -15,7 +15,9 @@ use mentat_core::{
ValueType, ValueType,
}; };
use mentat_query::Limit; use mentat_query::{
Limit,
};
use mentat_query_algebrizer::{ use mentat_query_algebrizer::{
AlgebraicQuery, AlgebraicQuery,
@ -47,6 +49,7 @@ use mentat_query_sql::{
ColumnOrExpression, ColumnOrExpression,
Constraint, Constraint,
FromClause, FromClause,
GroupBy,
Op, Op,
ProjectedColumn, ProjectedColumn,
Projection, Projection,
@ -214,7 +217,8 @@ fn table_for_computed(computed: ComputedTable, alias: TableAlias) -> TableOrSubq
// project it as the variable name. // project it as the variable name.
// E.g., SELECT datoms03.v AS `?x`. // E.g., SELECT datoms03.v AS `?x`.
for var in projection.iter() { for var in projection.iter() {
let (projected_column, maybe_type) = projected_column_for_var(var, &cc); // TODO: chain results out.
let (projected_column, type_set) = projected_column_for_var(var, &cc).expect("every var to be bound");
columns.push(projected_column); columns.push(projected_column);
// Similarly, project type tags if they're not known conclusively in the // Similarly, project type tags if they're not known conclusively in the
@ -222,10 +226,10 @@ fn table_for_computed(computed: ComputedTable, alias: TableAlias) -> TableOrSubq
// Assumption: we'll never need to project a tag without projecting the value of a variable. // Assumption: we'll never need to project a tag without projecting the value of a variable.
if type_extraction.contains(var) { if type_extraction.contains(var) {
let expression = let expression =
if let Some(ty) = maybe_type { if let Some(tag) = type_set.unique_type_tag() {
// If we know the type for sure, just project the constant. // If we know the type for sure, just project the constant.
// SELECT datoms03.v AS `?x`, 10 AS `?x_value_type_tag` // SELECT datoms03.v AS `?x`, 10 AS `?x_value_type_tag`
ColumnOrExpression::Integer(ty.value_type_tag()) ColumnOrExpression::Integer(tag)
} else { } else {
// Otherwise, we'll have an established type binding! This'll be // Otherwise, we'll have an established type binding! This'll be
// either a datoms table or, recursively, a subquery. Project // either a datoms table or, recursively, a subquery. Project
@ -246,7 +250,7 @@ fn table_for_computed(computed: ComputedTable, alias: TableAlias) -> TableOrSubq
// Each arm simply turns into a subquery. // Each arm simply turns into a subquery.
// The SQL translation will stuff "UNION" between each arm. // The SQL translation will stuff "UNION" between each arm.
let projection = Projection::Columns(columns); let projection = Projection::Columns(columns);
cc_to_select_query(projection, cc, false, None, Limit::None) cc_to_select_query(projection, cc, false, vec![], None, Limit::None)
}).collect(), }).collect(),
alias) alias)
}, },
@ -268,6 +272,7 @@ fn table_for_computed(computed: ComputedTable, alias: TableAlias) -> TableOrSubq
fn cc_to_select_query(projection: Projection, fn cc_to_select_query(projection: Projection,
cc: ConjoiningClauses, cc: ConjoiningClauses,
distinct: bool, distinct: bool,
group_by: Vec<GroupBy>,
order: Option<Vec<OrderBy>>, order: Option<Vec<OrderBy>>,
limit: Limit) -> SelectQuery { limit: Limit) -> SelectQuery {
let from = if cc.from.is_empty() { let from = if cc.from.is_empty() {
@ -303,6 +308,7 @@ fn cc_to_select_query(projection: Projection,
distinct: distinct, distinct: distinct,
projection: projection, projection: projection,
from: from, from: from,
group_by: group_by,
constraints: cc.wheres constraints: cc.wheres
.into_iter() .into_iter()
.map(|c| c.to_constraint()) .map(|c| c.to_constraint())
@ -321,12 +327,13 @@ pub fn cc_to_exists(cc: ConjoiningClauses) -> SelectQuery {
distinct: false, distinct: false,
projection: Projection::One, projection: Projection::One,
from: FromClause::Nothing, from: FromClause::Nothing,
group_by: vec![],
constraints: vec![], constraints: vec![],
order: vec![], order: vec![],
limit: Limit::None, limit: Limit::None,
} }
} else { } else {
cc_to_select_query(Projection::One, cc, false, None, Limit::None) cc_to_select_query(Projection::One, cc, false, vec![], None, Limit::None)
} }
} }
@ -335,9 +342,9 @@ pub fn cc_to_exists(cc: ConjoiningClauses) -> SelectQuery {
pub fn query_to_select(query: AlgebraicQuery) -> Result<ProjectedSelect> { pub fn query_to_select(query: AlgebraicQuery) -> Result<ProjectedSelect> {
// TODO: we can't pass `query.limit` here if we aggregate during projection. // TODO: we can't pass `query.limit` here if we aggregate during projection.
// SQL-based aggregation -- `SELECT SUM(datoms00.e)` -- is fine. // SQL-based aggregation -- `SELECT SUM(datoms00.e)` -- is fine.
let CombinedProjection { sql_projection, datalog_projector, distinct } = query_projection(&query)?; let CombinedProjection { sql_projection, datalog_projector, distinct, group_by_cols } = query_projection(&query)?;
Ok(ProjectedSelect { Ok(ProjectedSelect {
query: cc_to_select_query(sql_projection, query.cc, distinct, query.order, query.limit), query: cc_to_select_query(sql_projection, query.cc, distinct, group_by_cols, query.order, query.limit),
projector: datalog_projector, projector: datalog_projector,
}) })
} }

View file

@ -191,7 +191,7 @@ fn test_bound_variable_limit_affects_types() {
assert_eq!(Some(ValueType::Long), assert_eq!(Some(ValueType::Long),
algebrized.cc.known_type(&Variable::from_valid_name("?limit"))); algebrized.cc.known_type(&Variable::from_valid_name("?limit")));
let select = query_to_select(algebrized).expect("query to translate"); let select = query_to_select(algebrized).expect("query to successfully translate");
let SQLQuery { sql, args } = select.query.to_sql_query().unwrap(); let SQLQuery { sql, args } = select.query.to_sql_query().unwrap();
// TODO: this query isn't actually correct -- we don't yet algebrize for variables that are // TODO: this query isn't actually correct -- we don't yet algebrize for variables that are
@ -546,13 +546,13 @@ fn test_with_without_aggregate() {
// Known type. // Known type.
let query = r#"[:find ?x :with ?y :where [?x :foo/bar ?y]]"#; let query = r#"[:find ?x :with ?y :where [?x :foo/bar ?y]]"#;
let SQLQuery { sql, args } = translate(&schema, query); let SQLQuery { sql, args } = translate(&schema, query);
assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x`, `datoms00`.v AS `?y` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99"); assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99");
assert_eq!(args, vec![]); assert_eq!(args, vec![]);
// Unknown type. // Unknown type.
let query = r#"[:find ?x :with ?y :where [?x _ ?y]]"#; let query = r#"[:find ?x :with ?y :where [?x _ ?y]]"#;
let SQLQuery { sql, args } = translate(&schema, query); let SQLQuery { sql, args } = translate(&schema, query);
assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x`, `all_datoms00`.v AS `?y`, `all_datoms00`.value_type_tag AS `?y_value_type_tag` FROM `all_datoms` AS `all_datoms00`"); assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x` FROM `all_datoms` AS `all_datoms00`");
assert_eq!(args, vec![]); assert_eq!(args, vec![]);
} }
@ -754,6 +754,55 @@ fn test_unbound_attribute_with_ground() {
`all_datoms00`.value_type_tag = 5)"); `all_datoms00`.value_type_tag = 5)");
} }
/*
#[test]
fn test_colliding_values_unbound_types() {
let schema = prepopulated_schema();
let query = r#"[:find ?x ?a ?y
:where
[?x _ ?y]
[?a _ ?y]]"#;
let SQLQuery { sql, .. } = translate(&schema, query);
assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x`, \
`all_datoms01`.e AS `?a`, \
`all_datoms00`.v AS `?y`, \
`all_datoms00`.value_type_tag AS `?y_value_type_tag` \
FROM `all_datoms` AS `all_datoms00`, `all_datoms` AS `all_datoms01` \
WHERE `all_datoms00`.v = `all_datoms01`.v \
AND `all_datoms00`.value_type_tag = `all_datoms01`.value_type_tag");
}
#[test]
fn test_restricted_types() {
let schema = prepopulated_schema();
let query = r#"[:find ?x ?y
:where
[?x _ ?y]
[(< ?y 10)]]"#;
let SQLQuery { sql, .. } = translate(&schema, query);
// #385: use `datoms` instead of `all_datoms`.
// No need to project the type code -- long and double are distinguishable.
assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x`, \
`all_datoms00`.v AS `?y` \
FROM `all_datoms` AS `all_datoms00` \
WHERE `all_datoms00`.v < 10 \
AND `all_datoms00`.value_type_tag = 5");
}
#[test]
fn test_known_type_range() {
let schema = prepopulated_schema();
let query = r#"[:find ?x ?y :where [?x :foo/bar] [?y _ ?x]]"#;
let SQLQuery { sql, .. } = translate(&schema, query);
assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x`, \
`datoms00`.e AS `?y` \
FROM `datoms` AS `datoms00`, `datoms` AS `datoms01` \
WHERE `datoms00`.a = 66 \
AND `datoms01`.v = `datoms00`.e \
AND `datoms01`.value_type_tag = 1");
}
*/
#[test] #[test]
fn test_not_with_ground() { fn test_not_with_ground() {
@ -955,3 +1004,30 @@ fn test_instant_range() {
AND `datoms00`.v > 1497574601257000"); AND `datoms00`.v > 1497574601257000");
assert_eq!(args, vec![]); assert_eq!(args, vec![]);
} }
#[test]
fn test_project_aggregates() {
let schema = prepopulated_typed_schema(ValueType::Long);
let query = r#"[:find ?e (max ?t)
:where
[?e :foo/bar ?t]]"#;
let SQLQuery { sql, args } = translate(&schema, query);
assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?e`, max(`datoms00`.v) AS `(max ?t)` \
FROM \
`datoms` AS `datoms00` \
WHERE `datoms00`.a = 99 \
GROUP BY `datoms00`.e");
assert_eq!(args, vec![]);
let query = r#"[:find (max ?t)
:with ?e
:where
[?e :foo/bar ?t]]"#;
let SQLQuery { sql, args } = translate(&schema, query);
assert_eq!(sql, "SELECT DISTINCT max(`datoms00`.v) AS `(max ?t)` \
FROM \
`datoms` AS `datoms00` \
WHERE `datoms00`.a = 99 \
GROUP BY `datoms00`.e");
assert_eq!(args, vec![]);
}

View file

@ -428,17 +428,16 @@ pub struct Pull {
} }
*/ */
/* #[derive(Debug, Eq, PartialEq)]
pub struct Aggregate { pub struct Aggregate {
pub fn_name: String, pub func: QueryFunction,
pub args: Vec<FnArg>, pub args: Vec<FnArg>,
} }
*/
#[derive(Debug, Eq, PartialEq)] #[derive(Debug, Eq, PartialEq)]
pub enum Element { pub enum Element {
Variable(Variable), Variable(Variable),
// Aggregate(Aggregate), // TODO Aggregate(Aggregate),
// Pull(Pull), // TODO // Pull(Pull), // TODO
} }

View file

@ -84,6 +84,7 @@ pub fn q_once<'sqlite, 'schema, 'query, T>
let select = query_to_select(algebrized)?; let select = query_to_select(algebrized)?;
let SQLQuery { sql, args } = select.query.to_sql_query()?; let SQLQuery { sql, args } = select.query.to_sql_query()?;
println!("SQL: {:?}", sql);
let mut statement = sqlite.prepare(sql.as_str())?; let mut statement = sqlite.prepare(sql.as_str())?;
let rows = if args.is_empty() { let rows = if args.is_empty() {

View file

@ -361,7 +361,8 @@ fn test_fulltext() {
_ => panic!("Unexpected results."), _ => panic!("Unexpected results."),
} }
}, },
_ => panic!("Expected query to work."), Result::Ok(r) => panic!("Unexpected results {:?}.", r),
Result::Err(e) => panic!("Expected query to work, got {:?}.", e),
} }
let a = conn.transact(&mut c, r#"[[:db/add "a" :foo/term "talk"]]"#) let a = conn.transact(&mut c, r#"[[:db/add "a" :foo/term "talk"]]"#)
@ -454,3 +455,303 @@ fn test_instant_range_query() {
_ => panic!("Expected query to work."), _ => panic!("Expected query to work."),
} }
} }
#[test]
fn test_aggregation_implicit_grouping() {
let mut c = new_connection("").expect("Couldn't open conn.");
let mut conn = Conn::connect(&mut c).expect("Couldn't open DB.");
conn.transact(&mut c, r#"[
[:db/add "a" :db/ident :foo/score]
[:db/add "a" :db/valueType :db.type/long]
[:db/add "a" :db/cardinality :db.cardinality/one]
[:db/add "b" :db/ident :foo/name]
[:db/add "b" :db/valueType :db.type/string]
[:db/add "b" :db/cardinality :db.cardinality/one]
[:db/add "c" :db/ident :foo/is-vegetarian]
[:db/add "c" :db/valueType :db.type/boolean]
[:db/add "c" :db/cardinality :db.cardinality/one]
[:db/add "d" :db/ident :foo/play]
[:db/add "d" :db/valueType :db.type/ref]
[:db/add "d" :db/cardinality :db.cardinality/many]
[:db/add "d" :db/unique :db.unique/value]
]"#).unwrap();
let ids = conn.transact(&mut c, r#"[
[:db/add "a" :foo/name "Alice"]
[:db/add "b" :foo/name "Beli"]
[:db/add "c" :foo/name "Carlos"]
[:db/add "d" :foo/name "Diana"]
[:db/add "a" :foo/is-vegetarian true]
[:db/add "b" :foo/is-vegetarian true]
[:db/add "c" :foo/is-vegetarian false]
[:db/add "d" :foo/is-vegetarian false]
[:db/add "aa" :foo/score 14]
[:db/add "ab" :foo/score 99]
[:db/add "ac" :foo/score 14]
[:db/add "ba" :foo/score 22]
[:db/add "bb" :foo/score 11]
[:db/add "ca" :foo/score 42]
[:db/add "da" :foo/score 5]
[:db/add "db" :foo/score 28]
[:db/add "d" :foo/play "da"]
[:db/add "d" :foo/play "db"]
[:db/add "a" :foo/play "aa"]
[:db/add "a" :foo/play "ab"]
[:db/add "a" :foo/play "ac"]
[:db/add "b" :foo/play "ba"]
[:db/add "b" :foo/play "bb"]
[:db/add "c" :foo/play "ca"]
]"#).unwrap().tempids;
// We can combine these aggregates.
let r = conn.q_once(&mut c,
r#"[:find ?x ?name (max ?score) (count ?score) (avg ?score)
:where
[?x :foo/name ?name]
[?x :foo/play ?game]
[?game :foo/score ?score]
]"#, None);
match r {
Result::Ok(QueryResults::Rel(vals)) => {
assert_eq!(vals,
vec![
vec![TypedValue::Ref(ids.get("a").cloned().unwrap()),
TypedValue::String("Alice".to_string().into()),
TypedValue::Long(99),
TypedValue::Long(3),
TypedValue::Double((127f64 / 3f64).into())],
vec![TypedValue::Ref(ids.get("b").cloned().unwrap()),
TypedValue::String("Beli".to_string().into()),
TypedValue::Long(22),
TypedValue::Long(2),
TypedValue::Double((33f64 / 2f64).into())],
vec![TypedValue::Ref(ids.get("c").cloned().unwrap()),
TypedValue::String("Carlos".to_string().into()),
TypedValue::Long(42),
TypedValue::Long(1),
TypedValue::Double(42f64.into())],
vec![TypedValue::Ref(ids.get("d").cloned().unwrap()),
TypedValue::String("Diana".to_string().into()),
TypedValue::Long(28),
TypedValue::Long(2),
TypedValue::Double((33f64 / 2f64).into())]]);
},
Result::Ok(x) => panic!("Got unexpected results {:?}", x),
Result::Err(e) => panic!("Expected query to work: got {:?}", e),
}
}
// TODO: this can't be phrased in Datalog!
/*
#[test]
fn test_corresponding_row_value_aggregation() {
// Who's youngest, via min?
let r = conn.q_once(&mut c,
r#"[:find [?name (min ?age)]
:where
[?x :foo/age ?age]
[?x :foo/name ?name]]"#, None);
match r {
Result::Ok(QueryResults::Tuple(Some(vals))) => {
assert_eq!(vals,
vec![TypedValue::String("Alice".to_string().into()),
TypedValue::Long(14)]);
},
_ => panic!("Expected query to work."),
}
// Who's oldest, via max?
let r = conn.q_once(&mut c,
r#"[:find [?name (max ?age)]
:where
[?x :foo/age ?age]
[?x :foo/name ?name]]"#, None);
match r {
Result::Ok(QueryResults::Tuple(Some(vals))) => {
assert_eq!(vals,
vec![TypedValue::String("Carlos".to_string().into()),
TypedValue::Long(42)]);
},
_ => panic!("Expected query to work."),
}
}
*/
#[test]
fn test_simple_aggregation() {
let mut c = new_connection("").expect("Couldn't open conn.");
let mut conn = Conn::connect(&mut c).expect("Couldn't open DB.");
conn.transact(&mut c, r#"[
[:db/add "a" :db/ident :foo/age]
[:db/add "a" :db/valueType :db.type/long]
[:db/add "a" :db/cardinality :db.cardinality/one]
[:db/add "b" :db/ident :foo/name]
[:db/add "b" :db/valueType :db.type/string]
[:db/add "b" :db/cardinality :db.cardinality/one]
[:db/add "c" :db/ident :foo/is-vegetarian]
[:db/add "c" :db/valueType :db.type/boolean]
[:db/add "c" :db/cardinality :db.cardinality/one]
]"#).unwrap();
let ids = conn.transact(&mut c, r#"[
[:db/add "a" :foo/name "Alice"]
[:db/add "b" :foo/name "Beli"]
[:db/add "c" :foo/name "Carlos"]
[:db/add "d" :foo/name "Diana"]
[:db/add "a" :foo/is-vegetarian true]
[:db/add "b" :foo/is-vegetarian true]
[:db/add "c" :foo/is-vegetarian false]
[:db/add "d" :foo/is-vegetarian false]
[:db/add "a" :foo/age 14]
[:db/add "b" :foo/age 22]
[:db/add "c" :foo/age 42]
[:db/add "d" :foo/age 28]
]"#).unwrap().tempids;
// Count how many vegetarians there are. This is not the same as `count-distinct`.
// Note the distinction between including `:with` and not.
let r = conn.q_once(&mut c,
r#"[:find (count ?veg)
:where
[_ :foo/is-vegetarian ?veg]
[(ground true) ?veg]]"#, None);
match r {
Result::Ok(QueryResults::Rel(vals)) => {
assert_eq!(vals, vec![vec![TypedValue::Long(1)]]);
},
Result::Ok(r) => panic!("Expected query to work, got {:?}", r),
Result::Err(e) => panic!("Expected query to work, got {:?}", e),
}
let r = conn.q_once(&mut c,
r#"[:find (count ?veg) .
:with ?person
:where
[?person :foo/is-vegetarian ?veg]
[(ground true) ?veg]]"#, None);
match r {
Result::Ok(QueryResults::Scalar(Some(val))) => {
assert_eq!(val, TypedValue::Long(2));
},
_ => panic!("Expected query to work."),
}
// What are the oldest and youngest ages?
let r = conn.q_once(&mut c,
r#"[:find [(min ?age) (max ?age)]
:where
[_ :foo/age ?age]]"#, None);
match r {
Result::Ok(QueryResults::Tuple(Some(vals))) => {
assert_eq!(vals,
vec![TypedValue::Long(14),
TypedValue::Long(42)]);
},
_ => panic!("Expected query to work."),
}
// Who's youngest, via order?
let r = conn.q_once(&mut c,
r#"[:find [?name ?age]
:order (asc ?age)
:where
[?x :foo/age ?age]
[?x :foo/name ?name]]"#, None);
match r {
Result::Ok(QueryResults::Tuple(Some(vals))) => {
assert_eq!(vals,
vec![TypedValue::String("Alice".to_string().into()),
TypedValue::Long(14)]);
},
Result::Ok(r) => panic!("Unexpected results {:?}", r),
Result::Err(e) => panic!("Expected query to work, got {:?}", e),
}
// Who's oldest, via order?
let r = conn.q_once(&mut c,
r#"[:find [?name ?age]
:order (desc ?age)
:where
[?x :foo/age ?age]
[?x :foo/name ?name]]"#, None);
match r {
Result::Ok(QueryResults::Tuple(Some(vals))) => {
assert_eq!(vals,
vec![TypedValue::String("Carlos".to_string().into()),
TypedValue::Long(42)]);
},
_ => panic!("Expected query to work."),
}
// What's the average age?
let r = conn.q_once(&mut c,
r#"[:find (avg ?age) .
:where
[_ :foo/age ?age]]"#, None);
match r {
Result::Ok(QueryResults::Scalar(Some(sum))) => {
assert_eq!(sum, TypedValue::Double(26.5f64.into()));
},
_ => panic!("Expected query to work."),
}
// What's the total age?
let r = conn.q_once(&mut c,
r#"[:find (sum ?age) .
:where
[_ :foo/age ?age]]"#, None);
match r {
Result::Ok(QueryResults::Scalar(Some(sum))) => {
assert_eq!(sum, TypedValue::Long(106));
},
_ => panic!("Expected query to work."),
}
// How many distinct names are there?
let r = conn.q_once(&mut c,
r#"[:find (count ?name) .
:where
[_ :foo/name ?name]]"#, None);
match r {
Result::Ok(QueryResults::Scalar(Some(count))) => {
assert_eq!(count, TypedValue::Long(4));
},
_ => panic!("Expected query to work."),
}
// We can use constraints, too.
// What's the average age of adults?
let r = conn.q_once(&mut c,
r#"[:find [(avg ?age) (count ?age)]
:where
[_ :foo/age ?age]
[(>= ?age 18)]]"#, None);
match r {
Result::Ok(QueryResults::Tuple(Some(vals))) => {
assert_eq!(vals, vec![TypedValue::Double((92f64 / 3f64).into()),
TypedValue::Long(3)]);
},
Result::Ok(x) => panic!("Got unexpected results {:?}", x),
Result::Err(e) => panic!("Expected query to work: got {:?}", e),
}
// Who's oldest, vegetarians or not?
let r = conn.q_once(&mut c,
r#"[:find ?veg (max ?age)
:where
[?p :foo/age ?age]
[?p :foo/is-vegetarian ?veg]]"#, None);
match r {
Result::Ok(QueryResults::Rel(vals)) => {
assert_eq!(vals, vec![
vec![TypedValue::Boolean(false), TypedValue::Long(42)],
vec![TypedValue::Boolean(true), TypedValue::Long(22)],
]);
},
Result::Ok(x) => panic!("Got unexpected results {:?}", x),
Result::Err(e) => panic!("Expected query to work: got {:?}", e),
}
}