Simple aggregates. (#584) r=emily
* Pre: use debugcli in VSCode. * Pre: wrap subqueries in parentheses in output SQL. * Pre: add ExistingColumn. This lets us make reference to columns by name, rather than only pointing to qualified aliases. * Pre: add Into for &str to TypedValue. * Pre: add Store.transact. * Pre: cleanup. * Parse and algebrize simple aggregates. (#312) * Follow-up: print aggregate columns more neatly in the CLI. * Useful ValueTypeSet helpers. * Allow for entity inequalities. * Add 'differ', which is a ref-specialized not-equals. * Add 'unpermute', a function for getting unique, distinct pairs from bindings. * Review comments. * Add 'the' pseudo-aggregation operator. This allows for a corresponding value to be returned when a query includes one 'min' or 'max' aggregate.
This commit is contained in:
parent
46835885e4
commit
833ff92436
20 changed files with 1676 additions and 141 deletions
2
.vscode/tasks.json
vendored
2
.vscode/tasks.json
vendored
|
@ -21,7 +21,7 @@
|
|||
"label": "Run CLI",
|
||||
"command": "cargo",
|
||||
"args": [
|
||||
"cli",
|
||||
"debugcli",
|
||||
],
|
||||
"problemMatcher": [
|
||||
"$rustc"
|
||||
|
|
|
@ -293,6 +293,12 @@ impl From<Uuid> for TypedValue {
|
|||
}
|
||||
}
|
||||
|
||||
impl<'a> From<&'a str> for TypedValue {
|
||||
fn from(value: &'a str) -> TypedValue {
|
||||
TypedValue::String(Rc::new(value.to_string()))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<String> for TypedValue {
|
||||
fn from(value: String) -> TypedValue {
|
||||
TypedValue::String(Rc::new(value))
|
||||
|
@ -449,6 +455,15 @@ impl ValueTypeSet {
|
|||
ValueTypeSet(EnumSet::of_both(ValueType::Double, ValueType::Long))
|
||||
}
|
||||
|
||||
/// Return a set containing `Double`, `Long`, and `Instant`.
|
||||
pub fn of_numeric_and_instant_types() -> ValueTypeSet {
|
||||
let mut s = EnumSet::new();
|
||||
s.insert(ValueType::Double);
|
||||
s.insert(ValueType::Long);
|
||||
s.insert(ValueType::Instant);
|
||||
ValueTypeSet(s)
|
||||
}
|
||||
|
||||
/// Return a set containing `Ref` and `Keyword`.
|
||||
pub fn of_keywords() -> ValueTypeSet {
|
||||
ValueTypeSet(EnumSet::of_both(ValueType::Ref, ValueType::Keyword))
|
||||
|
@ -516,6 +531,18 @@ impl ValueTypeSet {
|
|||
}
|
||||
}
|
||||
|
||||
impl From<ValueType> for ValueTypeSet {
|
||||
fn from(t: ValueType) -> Self {
|
||||
ValueTypeSet::of_one(t)
|
||||
}
|
||||
}
|
||||
|
||||
impl ValueTypeSet {
|
||||
pub fn is_only_numeric(&self) -> bool {
|
||||
self.is_subset(&ValueTypeSet::of_numeric_types())
|
||||
}
|
||||
}
|
||||
|
||||
impl IntoIterator for ValueTypeSet {
|
||||
type Item = ValueType;
|
||||
type IntoIter = ::enum_set::Iter<ValueType>;
|
||||
|
@ -541,10 +568,16 @@ impl ::std::iter::Extend<ValueType> for ValueTypeSet {
|
|||
}
|
||||
}
|
||||
|
||||
/// We have an enum of types, `ValueType`. It can be collected into a set, `ValueTypeSet`. Each type
|
||||
/// is associated with a type tag, which is how a type is represented in, e.g., SQL storage. Types
|
||||
/// can share type tags, because backing SQL storage is able to differentiate between some types
|
||||
/// (e.g., longs and doubles), and so distinct tags aren't necessary. That association is defined by
|
||||
/// `SQLValueType`. That trait similarly extends to `ValueTypeSet`, which maps a collection of types
|
||||
/// into a collection of tags.
|
||||
pub trait SQLValueTypeSet {
|
||||
fn value_type_tags(&self) -> BTreeSet<ValueTypeTag>;
|
||||
fn has_unique_type_code(&self) -> bool;
|
||||
fn unique_type_code(&self) -> Option<ValueTypeTag>;
|
||||
fn has_unique_type_tag(&self) -> bool;
|
||||
fn unique_type_tag(&self) -> Option<ValueTypeTag>;
|
||||
}
|
||||
|
||||
impl SQLValueTypeSet for ValueTypeSet {
|
||||
|
@ -557,15 +590,15 @@ impl SQLValueTypeSet for ValueTypeSet {
|
|||
out
|
||||
}
|
||||
|
||||
fn unique_type_code(&self) -> Option<ValueTypeTag> {
|
||||
if self.is_unit() || self.has_unique_type_code() {
|
||||
fn unique_type_tag(&self) -> Option<ValueTypeTag> {
|
||||
if self.is_unit() || self.has_unique_type_tag() {
|
||||
self.exemplar().map(|t| t.value_type_tag())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn has_unique_type_code(&self) -> bool {
|
||||
fn has_unique_type_tag(&self) -> bool {
|
||||
if self.is_unit() {
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -94,7 +94,7 @@ impl ConjoiningClauses {
|
|||
// TODO: process source variables.
|
||||
match args.next().unwrap() {
|
||||
FnArg::SrcVar(SrcVar::DefaultSrc) => {},
|
||||
_ => bail!(ErrorKind::InvalidArgument(where_fn.operator.clone(), "source variable".into(), 0)),
|
||||
_ => bail!(ErrorKind::InvalidArgument(where_fn.operator.clone(), "source variable", 0)),
|
||||
}
|
||||
|
||||
let schema = known.schema;
|
||||
|
@ -127,8 +127,11 @@ impl ConjoiningClauses {
|
|||
// An unknown ident, or an entity that isn't present in the store, or isn't a fulltext
|
||||
// attribute, is likely enough to be a coding error that we choose to bail instead of
|
||||
// marking the pattern as known-empty.
|
||||
let a = a.ok_or(ErrorKind::InvalidArgument(where_fn.operator.clone(), "attribute".into(), 1))?;
|
||||
let attribute = schema.attribute_for_entid(a).cloned().ok_or(ErrorKind::InvalidArgument(where_fn.operator.clone(), "attribute".into(), 1))?;
|
||||
let a = a.ok_or(ErrorKind::InvalidArgument(where_fn.operator.clone(), "attribute", 1))?;
|
||||
let attribute = schema.attribute_for_entid(a)
|
||||
.cloned()
|
||||
.ok_or(ErrorKind::InvalidArgument(where_fn.operator.clone(),
|
||||
"attribute", 1))?;
|
||||
|
||||
if !attribute.fulltext {
|
||||
// We can never get results from a non-fulltext attribute!
|
||||
|
@ -166,12 +169,12 @@ impl ConjoiningClauses {
|
|||
FnArg::Variable(in_var) => {
|
||||
match self.bound_value(&in_var) {
|
||||
Some(t @ TypedValue::String(_)) => Either::Left(t),
|
||||
Some(_) => bail!(ErrorKind::InvalidArgument(where_fn.operator.clone(), "string".into(), 2)),
|
||||
Some(_) => bail!(ErrorKind::InvalidArgument(where_fn.operator.clone(), "string", 2)),
|
||||
None => {
|
||||
// Regardless of whether we'll be providing a string later, or the value
|
||||
// comes from a column, it must be a string.
|
||||
if self.known_type(&in_var) != Some(ValueType::String) {
|
||||
bail!(ErrorKind::InvalidArgument(where_fn.operator.clone(), "string".into(), 2));
|
||||
bail!(ErrorKind::InvalidArgument(where_fn.operator.clone(), "string", 2));
|
||||
}
|
||||
|
||||
if self.input_variables.contains(&in_var) {
|
||||
|
@ -192,7 +195,7 @@ impl ConjoiningClauses {
|
|||
},
|
||||
}
|
||||
},
|
||||
_ => bail!(ErrorKind::InvalidArgument(where_fn.operator.clone(), "string".into(), 2)),
|
||||
_ => bail!(ErrorKind::InvalidArgument(where_fn.operator.clone(), "string", 2)),
|
||||
};
|
||||
|
||||
let qv = match search {
|
||||
|
|
|
@ -286,6 +286,18 @@ impl Default for ConjoiningClauses {
|
|||
}
|
||||
}
|
||||
|
||||
pub struct VariableIterator<'a>(
|
||||
::std::collections::btree_map::Keys<'a, Variable, TypedValue>,
|
||||
);
|
||||
|
||||
impl<'a> Iterator for VariableIterator<'a> {
|
||||
type Item = &'a Variable;
|
||||
|
||||
fn next(&mut self) -> Option<&'a Variable> {
|
||||
self.0.next()
|
||||
}
|
||||
}
|
||||
|
||||
impl ConjoiningClauses {
|
||||
/// Construct a new `ConjoiningClauses` with the provided alias counter. This allows a caller
|
||||
/// to share a counter with an enclosing scope, and to start counting at a particular offset
|
||||
|
@ -390,7 +402,7 @@ impl ConjoiningClauses {
|
|||
self.value_bindings.get(var).cloned()
|
||||
}
|
||||
|
||||
pub(crate) fn is_value_bound(&self, var: &Variable) -> bool {
|
||||
pub fn is_value_bound(&self, var: &Variable) -> bool {
|
||||
self.value_bindings.contains_key(var)
|
||||
}
|
||||
|
||||
|
@ -398,9 +410,14 @@ impl ConjoiningClauses {
|
|||
self.value_bindings.with_intersected_keys(variables)
|
||||
}
|
||||
|
||||
/// Return an iterator over the variables externally bound to values.
|
||||
pub fn value_bound_variables(&self) -> VariableIterator {
|
||||
VariableIterator(self.value_bindings.keys())
|
||||
}
|
||||
|
||||
/// Return a set of the variables externally bound to values.
|
||||
pub(crate) fn value_bound_variable_set(&self) -> BTreeSet<Variable> {
|
||||
self.value_bindings.keys().cloned().collect()
|
||||
pub fn value_bound_variable_set(&self) -> BTreeSet<Variable> {
|
||||
self.value_bound_variables().cloned().collect()
|
||||
}
|
||||
|
||||
/// Return a single `ValueType` if the given variable is known to have a precise type.
|
||||
|
@ -414,7 +431,7 @@ impl ConjoiningClauses {
|
|||
}
|
||||
}
|
||||
|
||||
pub(crate) fn known_type_set(&self, var: &Variable) -> ValueTypeSet {
|
||||
pub fn known_type_set(&self, var: &Variable) -> ValueTypeSet {
|
||||
self.known_types.get(var).cloned().unwrap_or(ValueTypeSet::any())
|
||||
}
|
||||
|
||||
|
|
|
@ -92,13 +92,13 @@ impl ConjoiningClauses {
|
|||
let mut left_types = self.potential_types(known.schema, &left)?
|
||||
.intersection(&supported_types);
|
||||
if left_types.is_empty() {
|
||||
bail!(ErrorKind::InvalidArgument(predicate.operator.clone(), "numeric or instant", 0));
|
||||
bail!(ErrorKind::InvalidArgumentType(predicate.operator.clone(), supported_types, 0));
|
||||
}
|
||||
|
||||
let mut right_types = self.potential_types(known.schema, &right)?
|
||||
.intersection(&supported_types);
|
||||
if right_types.is_empty() {
|
||||
bail!(ErrorKind::InvalidArgument(predicate.operator.clone(), "numeric or instant", 1));
|
||||
bail!(ErrorKind::InvalidArgumentType(predicate.operator.clone(), supported_types, 1));
|
||||
}
|
||||
|
||||
// We would like to allow longs to compare to doubles.
|
||||
|
@ -134,14 +134,18 @@ impl ConjoiningClauses {
|
|||
// We expect the intersection to be Long, Long+Double, Double, or Instant.
|
||||
let left_v;
|
||||
let right_v;
|
||||
|
||||
if shared_types == ValueTypeSet::of_one(ValueType::Instant) {
|
||||
left_v = self.resolve_instant_argument(&predicate.operator, 0, left)?;
|
||||
right_v = self.resolve_instant_argument(&predicate.operator, 1, right)?;
|
||||
} else if !shared_types.is_empty() && shared_types.is_subset(&ValueTypeSet::of_numeric_types()) {
|
||||
} else if shared_types.is_only_numeric() {
|
||||
left_v = self.resolve_numeric_argument(&predicate.operator, 0, left)?;
|
||||
right_v = self.resolve_numeric_argument(&predicate.operator, 1, right)?;
|
||||
} else if shared_types == ValueTypeSet::of_one(ValueType::Ref) {
|
||||
left_v = self.resolve_ref_argument(known.schema, &predicate.operator, 0, left)?;
|
||||
right_v = self.resolve_ref_argument(known.schema, &predicate.operator, 1, right)?;
|
||||
} else {
|
||||
bail!(ErrorKind::InvalidArgument(predicate.operator.clone(), "numeric or instant", 0));
|
||||
bail!(ErrorKind::InvalidArgumentType(predicate.operator.clone(), supported_types, 0));
|
||||
}
|
||||
|
||||
// These arguments must be variables or instant/numeric constants.
|
||||
|
|
|
@ -9,6 +9,8 @@
|
|||
// specific language governing permissions and limitations under the License.
|
||||
|
||||
use mentat_core::{
|
||||
HasSchema,
|
||||
Schema,
|
||||
TypedValue,
|
||||
ValueType,
|
||||
};
|
||||
|
@ -92,11 +94,49 @@ impl ConjoiningClauses {
|
|||
Constant(NonIntegerConstant::BigInteger(_)) |
|
||||
Vector(_) => {
|
||||
self.mark_known_empty(EmptyBecause::NonInstantArgument);
|
||||
bail!(ErrorKind::InvalidArgument(function.clone(), "instant", position));
|
||||
bail!(ErrorKind::InvalidArgumentType(function.clone(), ValueType::Instant.into(), position));
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Take a function argument and turn it into a `QueryValue` suitable for use in a concrete
|
||||
/// constraint.
|
||||
pub(crate) fn resolve_ref_argument(&mut self, schema: &Schema, function: &PlainSymbol, position: usize, arg: FnArg) -> Result<QueryValue> {
|
||||
use self::FnArg::*;
|
||||
match arg {
|
||||
FnArg::Variable(var) => {
|
||||
self.constrain_var_to_type(var.clone(), ValueType::Ref);
|
||||
if let Some(TypedValue::Ref(e)) = self.bound_value(&var) {
|
||||
// Incorrect types will be handled by the constraint, above.
|
||||
Ok(QueryValue::Entid(e))
|
||||
} else {
|
||||
self.column_bindings
|
||||
.get(&var)
|
||||
.and_then(|cols| cols.first().map(|col| QueryValue::Column(col.clone())))
|
||||
.ok_or_else(|| Error::from_kind(ErrorKind::UnboundVariable(var.name())))
|
||||
}
|
||||
},
|
||||
EntidOrInteger(i) => Ok(QueryValue::TypedValue(TypedValue::Ref(i))),
|
||||
IdentOrKeyword(i) => {
|
||||
schema.get_entid(&i)
|
||||
.map(|known_entid| QueryValue::Entid(known_entid.into()))
|
||||
.ok_or_else(|| Error::from_kind(ErrorKind::UnrecognizedIdent(i.to_string())))
|
||||
},
|
||||
Constant(NonIntegerConstant::Boolean(_)) |
|
||||
Constant(NonIntegerConstant::Float(_)) |
|
||||
Constant(NonIntegerConstant::Text(_)) |
|
||||
Constant(NonIntegerConstant::Uuid(_)) |
|
||||
Constant(NonIntegerConstant::Instant(_)) |
|
||||
Constant(NonIntegerConstant::BigInteger(_)) |
|
||||
SrcVar(_) |
|
||||
Vector(_) => {
|
||||
self.mark_known_empty(EmptyBecause::NonEntityArgument);
|
||||
bail!(ErrorKind::InvalidArgumentType(function.clone(), ValueType::Ref.into(), position));
|
||||
},
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/// Take a function argument and turn it into a `QueryValue` suitable for use in a concrete
|
||||
/// constraint.
|
||||
#[allow(dead_code)]
|
||||
|
|
|
@ -12,6 +12,7 @@ extern crate mentat_query;
|
|||
|
||||
use mentat_core::{
|
||||
ValueType,
|
||||
ValueTypeSet,
|
||||
};
|
||||
|
||||
use self::mentat_query::{
|
||||
|
@ -49,6 +50,11 @@ error_chain! {
|
|||
display("value of type {} provided for var {}, expected {}", provided, var, declared)
|
||||
}
|
||||
|
||||
UnrecognizedIdent(ident: String) {
|
||||
description("no entid found for ident")
|
||||
display("no entid found for ident: {}", ident)
|
||||
}
|
||||
|
||||
UnknownFunction(name: PlainSymbol) {
|
||||
description("no such function")
|
||||
display("no function named {}", name)
|
||||
|
@ -80,9 +86,14 @@ error_chain! {
|
|||
display("invalid expression in ground constant")
|
||||
}
|
||||
|
||||
InvalidArgument(function: PlainSymbol, expected_type: &'static str, position: usize) {
|
||||
InvalidArgument(function: PlainSymbol, expected: &'static str, position: usize) {
|
||||
description("invalid argument")
|
||||
display("invalid argument to {}: expected {} in position {}.", function, expected_type, position)
|
||||
display("invalid argument to {}: expected {} in position {}.", function, expected, position)
|
||||
}
|
||||
|
||||
InvalidArgumentType(function: PlainSymbol, expected_types: ValueTypeSet, position: usize) {
|
||||
description("invalid argument")
|
||||
display("invalid argument to {}: expected one of {:?} in position {}.", function, expected_types, position)
|
||||
}
|
||||
|
||||
InvalidLimit(val: String, kind: ValueType) {
|
||||
|
|
|
@ -8,6 +8,8 @@
|
|||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
|
||||
#![recursion_limit="128"]
|
||||
|
||||
#[macro_use]
|
||||
extern crate error_chain;
|
||||
|
||||
|
@ -130,7 +132,19 @@ pub struct AlgebraicQuery {
|
|||
default_source: SrcVar,
|
||||
pub find_spec: Rc<FindSpec>,
|
||||
has_aggregates: bool,
|
||||
|
||||
/// The set of variables that the caller wishes to be used for grouping when aggregating.
|
||||
/// These are specified in the query input, as `:with`, and are then chewed up during projection.
|
||||
/// If no variables are supplied, then no additional grouping is necessary beyond the
|
||||
/// non-aggregated projection list.
|
||||
pub with: BTreeSet<Variable>,
|
||||
|
||||
/// Some query features, such as ordering, are implemented by implicit reference to SQL columns.
|
||||
/// In order for these references to be 'live', those columns must be projected.
|
||||
/// This is the set of variables that must be so projected.
|
||||
/// This is not necessarily every variable that will be so required -- some variables
|
||||
/// will already be in the projection list.
|
||||
pub named_projection: BTreeSet<Variable>,
|
||||
pub order: Option<Vec<OrderBy>>,
|
||||
pub limit: Limit,
|
||||
pub cc: clauses::ConjoiningClauses,
|
||||
|
@ -147,7 +161,12 @@ impl AlgebraicQuery {
|
|||
self.find_spec
|
||||
.columns()
|
||||
.all(|e| match e {
|
||||
&Element::Variable(ref var) => self.cc.is_value_bound(var),
|
||||
&Element::Variable(ref var) |
|
||||
&Element::Corresponding(ref var) => self.cc.is_value_bound(var),
|
||||
|
||||
// For now, we pretend that aggregate functions are never fully bound:
|
||||
// we don't statically compute them, even if we know the value of the var.
|
||||
&Element::Aggregate(ref _fn) => false,
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -270,7 +289,6 @@ pub fn algebrize_with_inputs(known: Known,
|
|||
cc.process_required_types()?;
|
||||
|
||||
let (order, extra_vars) = validate_and_simplify_order(&cc, parsed.order)?;
|
||||
let with: BTreeSet<Variable> = parsed.with.into_iter().chain(extra_vars.into_iter()).collect();
|
||||
|
||||
// This might leave us with an unused `:in` variable.
|
||||
let limit = if parsed.find_spec.is_unit_limited() { Limit::Fixed(1) } else { parsed.limit };
|
||||
|
@ -278,7 +296,8 @@ pub fn algebrize_with_inputs(known: Known,
|
|||
default_source: parsed.default_source,
|
||||
find_spec: Rc::new(parsed.find_spec),
|
||||
has_aggregates: false, // TODO: we don't parse them yet.
|
||||
with: with,
|
||||
with: parsed.with,
|
||||
named_projection: extra_vars,
|
||||
order: order,
|
||||
limit: limit,
|
||||
cc: cc,
|
||||
|
|
|
@ -283,6 +283,10 @@ pub enum Inequality {
|
|||
GreaterThan,
|
||||
GreaterThanOrEquals,
|
||||
NotEquals,
|
||||
|
||||
// Ref operators.
|
||||
Unpermute,
|
||||
Differ,
|
||||
}
|
||||
|
||||
impl Inequality {
|
||||
|
@ -294,6 +298,9 @@ impl Inequality {
|
|||
GreaterThan => ">",
|
||||
GreaterThanOrEquals => ">=",
|
||||
NotEquals => "<>",
|
||||
|
||||
Unpermute => "<",
|
||||
Differ => "<>",
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -304,15 +311,31 @@ impl Inequality {
|
|||
">" => Some(Inequality::GreaterThan),
|
||||
">=" => Some(Inequality::GreaterThanOrEquals),
|
||||
"!=" => Some(Inequality::NotEquals),
|
||||
_ => None,
|
||||
|
||||
"unpermute" => Some(Inequality::Unpermute),
|
||||
"differ" => Some(Inequality::Differ),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
// The built-in inequality operators apply to Long, Double, and Instant.
|
||||
pub fn supported_types(&self) -> ValueTypeSet {
|
||||
let mut ts = ValueTypeSet::of_numeric_types();
|
||||
ts.insert(ValueType::Instant);
|
||||
ts
|
||||
use self::Inequality::*;
|
||||
match self {
|
||||
&LessThan |
|
||||
&LessThanOrEquals |
|
||||
&GreaterThan |
|
||||
&GreaterThanOrEquals |
|
||||
&NotEquals => {
|
||||
let mut ts = ValueTypeSet::of_numeric_types();
|
||||
ts.insert(ValueType::Instant);
|
||||
ts
|
||||
},
|
||||
&Unpermute |
|
||||
&Differ => {
|
||||
ValueTypeSet::of_one(ValueType::Ref)
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -325,6 +348,9 @@ impl Debug for Inequality {
|
|||
&GreaterThan => ">",
|
||||
&GreaterThanOrEquals => ">=",
|
||||
&NotEquals => "!=", // Datalog uses !=. SQL uses <>.
|
||||
|
||||
&Unpermute => "<",
|
||||
&Differ => "<>",
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -505,6 +531,7 @@ pub enum EmptyBecause {
|
|||
NonAttributeArgument,
|
||||
NonInstantArgument,
|
||||
NonNumericArgument,
|
||||
NonEntityArgument,
|
||||
NonStringFulltextValue,
|
||||
NonFulltextAttribute(Entid),
|
||||
UnresolvedIdent(NamespacedKeyword),
|
||||
|
@ -546,6 +573,9 @@ impl Debug for EmptyBecause {
|
|||
&NonInstantArgument => {
|
||||
write!(f, "Non-instant argument in instant place")
|
||||
},
|
||||
&NonEntityArgument => {
|
||||
write!(f, "Non-entity argument in entity place")
|
||||
},
|
||||
&NonNumericArgument => {
|
||||
write!(f, "Non-numeric argument in numeric place")
|
||||
},
|
||||
|
|
|
@ -69,9 +69,9 @@ fn test_instant_predicates_require_instants() {
|
|||
[?e :foo/date ?t]
|
||||
[(> ?t "2017-06-16T00:56:41.257Z")]]"#;
|
||||
match bails(known, query).0 {
|
||||
ErrorKind::InvalidArgument(op, why, idx) => {
|
||||
ErrorKind::InvalidArgumentType(op, why, idx) => {
|
||||
assert_eq!(op, PlainSymbol::new(">"));
|
||||
assert_eq!(why, "numeric or instant");
|
||||
assert_eq!(why, ValueTypeSet::of_numeric_and_instant_types());
|
||||
assert_eq!(idx, 1);
|
||||
},
|
||||
_ => panic!("Expected InvalidArgument."),
|
||||
|
@ -82,9 +82,9 @@ fn test_instant_predicates_require_instants() {
|
|||
[?e :foo/date ?t]
|
||||
[(> "2017-06-16T00:56:41.257Z", ?t)]]"#;
|
||||
match bails(known, query).0 {
|
||||
ErrorKind::InvalidArgument(op, why, idx) => {
|
||||
ErrorKind::InvalidArgumentType(op, why, idx) => {
|
||||
assert_eq!(op, PlainSymbol::new(">"));
|
||||
assert_eq!(why, "numeric or instant");
|
||||
assert_eq!(why, ValueTypeSet::of_numeric_and_instant_types());
|
||||
assert_eq!(idx, 0); // We get this right.
|
||||
},
|
||||
_ => panic!("Expected InvalidArgument."),
|
||||
|
|
|
@ -41,6 +41,7 @@ use self::mentat_parser_utils::value_and_span::{
|
|||
};
|
||||
|
||||
use self::mentat_query::{
|
||||
Aggregate,
|
||||
Binding,
|
||||
Direction,
|
||||
Element,
|
||||
|
@ -170,6 +171,8 @@ def_parser!(Query, order, Order, {
|
|||
.or(Query::variable().map(|v| Order(Direction::Ascending, v)))
|
||||
});
|
||||
|
||||
def_matches_plain_symbol!(Query, the, "the");
|
||||
|
||||
pub struct Where<'a>(std::marker::PhantomData<&'a ()>);
|
||||
|
||||
def_parser!(Where, pattern_value_place, PatternValuePlace, {
|
||||
|
@ -274,6 +277,13 @@ def_parser!(Query, func, (QueryFunction, Vec<FnArg>), {
|
|||
(Query::query_function(), Query::arguments())
|
||||
});
|
||||
|
||||
def_parser!(Query, aggregate, Aggregate, {
|
||||
seq().of_exactly(Query::func())
|
||||
.map(|(func, args)| Aggregate {
|
||||
func, args,
|
||||
})
|
||||
});
|
||||
|
||||
/// A vector containing just a parenthesized filter expression.
|
||||
def_parser!(Where, pred, WhereClause, {
|
||||
// Accept either a nested list or a nested vector here:
|
||||
|
@ -417,10 +427,25 @@ def_matches_plain_symbol!(Find, ellipsis, "...");
|
|||
|
||||
def_matches_plain_symbol!(Find, placeholder, "_");
|
||||
|
||||
def_parser!(Find, elem, Element, {
|
||||
def_parser!(Find, variable_element, Element, {
|
||||
Query::variable().map(Element::Variable)
|
||||
});
|
||||
|
||||
def_parser!(Find, corresponding_element, Element, {
|
||||
seq().of_exactly(Query::the().with(Query::variable()))
|
||||
.map(Element::Corresponding)
|
||||
});
|
||||
|
||||
def_parser!(Find, aggregate_element, Element, {
|
||||
Query::aggregate().map(Element::Aggregate)
|
||||
});
|
||||
|
||||
def_parser!(Find, elem, Element, {
|
||||
choice([try(Find::variable_element()),
|
||||
try(Find::corresponding_element()),
|
||||
try(Find::aggregate_element())])
|
||||
});
|
||||
|
||||
def_parser!(Find, find_scalar, FindSpec, {
|
||||
Find::elem().skip(Find::period())
|
||||
.map(FindSpec::FindScalar)
|
||||
|
@ -955,6 +980,45 @@ mod test {
|
|||
]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_the() {
|
||||
assert_edn_parses_to!(Find::corresponding_element,
|
||||
"(the ?y)",
|
||||
Element::Corresponding(Variable::from_valid_name("?y")));
|
||||
assert_edn_parses_to!(Find::find_tuple,
|
||||
"[(the ?x) ?y]",
|
||||
FindSpec::FindTuple(vec![Element::Corresponding(Variable::from_valid_name("?x")),
|
||||
Element::Variable(Variable::from_valid_name("?y"))]));
|
||||
assert_edn_parses_to!(Find::spec,
|
||||
"[(the ?x) ?y]",
|
||||
FindSpec::FindTuple(vec![Element::Corresponding(Variable::from_valid_name("?x")),
|
||||
Element::Variable(Variable::from_valid_name("?y"))]));
|
||||
let expected_query =
|
||||
FindQuery {
|
||||
find_spec: FindSpec::FindTuple(vec![Element::Corresponding(Variable::from_valid_name("?x")),
|
||||
Element::Variable(Variable::from_valid_name("?y"))]),
|
||||
where_clauses: vec![
|
||||
WhereClause::Pattern(Pattern {
|
||||
source: None,
|
||||
entity: PatternNonValuePlace::Variable(Variable::from_valid_name("?x")),
|
||||
attribute: PatternNonValuePlace::Placeholder,
|
||||
value: PatternValuePlace::Variable(Variable::from_valid_name("?y")),
|
||||
tx: PatternNonValuePlace::Placeholder,
|
||||
})],
|
||||
|
||||
default_source: SrcVar::DefaultSrc,
|
||||
with: Default::default(),
|
||||
in_vars: Default::default(),
|
||||
in_sources: Default::default(),
|
||||
limit: Limit::None,
|
||||
order: None,
|
||||
};
|
||||
assert_edn_parses_to!(Find::query,
|
||||
"[:find [(the ?x) ?y]
|
||||
:where [?x _ ?y]]",
|
||||
expected_query);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_where_fn() {
|
||||
assert_edn_parses_to!(Where::where_fn,
|
||||
|
|
|
@ -5,6 +5,7 @@ workspace = ".."
|
|||
|
||||
[dependencies]
|
||||
error-chain = { git = "https://github.com/rnewman/error-chain", branch = "rnewman/sync" }
|
||||
indexmap = "0.4"
|
||||
|
||||
[dependencies.rusqlite]
|
||||
version = "0.13"
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
|
||||
#[macro_use]
|
||||
extern crate error_chain;
|
||||
extern crate indexmap;
|
||||
extern crate rusqlite;
|
||||
|
||||
extern crate mentat_core;
|
||||
|
@ -24,8 +25,13 @@ use std::collections::{
|
|||
};
|
||||
|
||||
use std::iter;
|
||||
|
||||
use std::rc::Rc;
|
||||
|
||||
use indexmap::{
|
||||
IndexSet,
|
||||
};
|
||||
|
||||
use rusqlite::{
|
||||
Row,
|
||||
Rows,
|
||||
|
@ -33,8 +39,10 @@ use rusqlite::{
|
|||
|
||||
use mentat_core::{
|
||||
SQLValueType,
|
||||
SQLValueTypeSet,
|
||||
TypedValue,
|
||||
ValueType,
|
||||
ValueTypeSet,
|
||||
ValueTypeTag,
|
||||
};
|
||||
|
||||
|
@ -47,9 +55,12 @@ use mentat_db::{
|
|||
};
|
||||
|
||||
use mentat_query::{
|
||||
Aggregate,
|
||||
Element,
|
||||
FindSpec,
|
||||
Limit,
|
||||
PlainSymbol,
|
||||
QueryFunction,
|
||||
Variable,
|
||||
};
|
||||
|
||||
|
@ -57,12 +68,15 @@ use mentat_query_algebrizer::{
|
|||
AlgebraicQuery,
|
||||
ColumnName,
|
||||
ConjoiningClauses,
|
||||
QualifiedAlias,
|
||||
VariableBindings,
|
||||
VariableColumn,
|
||||
};
|
||||
|
||||
use mentat_query_sql::{
|
||||
ColumnOrExpression,
|
||||
Expression,
|
||||
GroupBy,
|
||||
Name,
|
||||
Projection,
|
||||
ProjectedColumn,
|
||||
|
@ -73,6 +87,39 @@ error_chain! {
|
|||
Error, ErrorKind, ResultExt, Result;
|
||||
}
|
||||
|
||||
errors {
|
||||
/// We're just not done yet. Message that the feature is recognized but not yet
|
||||
/// implemented.
|
||||
NotYetImplemented(t: String) {
|
||||
description("not yet implemented")
|
||||
display("not yet implemented: {}", t)
|
||||
}
|
||||
CannotProjectImpossibleBinding(op: SimpleAggregationOp) {
|
||||
description("no possible types for variable in projection list")
|
||||
display("no possible types for value provided to {:?}", op)
|
||||
}
|
||||
CannotApplyAggregateOperationToTypes(op: SimpleAggregationOp, types: ValueTypeSet) {
|
||||
description("cannot apply projection operation to types")
|
||||
display("cannot apply projection operation {:?} to types {:?}", op, types)
|
||||
}
|
||||
UnboundVariable(var: PlainSymbol) {
|
||||
description("cannot project unbound variable")
|
||||
display("cannot project unbound variable {:?}", var)
|
||||
}
|
||||
NoTypeAvailableForVariable(var: PlainSymbol) {
|
||||
description("cannot find type for variable")
|
||||
display("cannot find type for variable {:?}", var)
|
||||
}
|
||||
UnexpectedResultsType(actual: &'static str, expected: &'static str) {
|
||||
description("unexpected query results type")
|
||||
display("expected {}, got {}", expected, actual)
|
||||
}
|
||||
AmbiguousAggregates(min_max_count: usize, corresponding_count: usize) {
|
||||
description("ambiguous aggregates")
|
||||
display("min/max expressions: {} (max 1), corresponding: {}", min_max_count, corresponding_count)
|
||||
}
|
||||
}
|
||||
|
||||
foreign_links {
|
||||
Rusqlite(rusqlite::Error);
|
||||
}
|
||||
|
@ -80,13 +127,6 @@ error_chain! {
|
|||
links {
|
||||
DbError(mentat_db::Error, mentat_db::ErrorKind);
|
||||
}
|
||||
|
||||
errors {
|
||||
UnexpectedResultsType(actual: &'static str, expected: &'static str) {
|
||||
description("unexpected query results type")
|
||||
display("expected {}, got {}", expected, actual)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
|
@ -146,23 +186,54 @@ impl QueryOutput {
|
|||
pub fn from_constants(spec: &Rc<FindSpec>, bindings: VariableBindings) -> QueryResults {
|
||||
use self::FindSpec::*;
|
||||
match &**spec {
|
||||
&FindScalar(Element::Variable(ref var)) => {
|
||||
&FindScalar(Element::Variable(ref var)) |
|
||||
&FindScalar(Element::Corresponding(ref var)) => {
|
||||
let val = bindings.get(var).cloned();
|
||||
QueryResults::Scalar(val)
|
||||
},
|
||||
&FindScalar(Element::Aggregate(ref _agg)) => {
|
||||
// TODO
|
||||
unimplemented!();
|
||||
},
|
||||
&FindTuple(ref elements) => {
|
||||
let values = elements.iter().map(|e| match e {
|
||||
&Element::Variable(ref var) => bindings.get(var).cloned().expect("every var to have a binding"),
|
||||
}).collect();
|
||||
let values = elements.iter()
|
||||
.map(|e| match e {
|
||||
&Element::Variable(ref var) |
|
||||
&Element::Corresponding(ref var) => {
|
||||
bindings.get(var).cloned().expect("every var to have a binding")
|
||||
},
|
||||
&Element::Aggregate(ref _agg) => {
|
||||
// TODO: static computation of aggregates, then
|
||||
// implement the condition in `is_fully_bound`.
|
||||
unreachable!();
|
||||
},
|
||||
})
|
||||
.collect();
|
||||
QueryResults::Tuple(Some(values))
|
||||
},
|
||||
&FindColl(Element::Variable(ref var)) => {
|
||||
&FindColl(Element::Variable(ref var)) |
|
||||
&FindColl(Element::Corresponding(ref var)) => {
|
||||
let val = bindings.get(var).cloned().expect("every var to have a binding");
|
||||
QueryResults::Coll(vec![val])
|
||||
},
|
||||
&FindColl(Element::Aggregate(ref _agg)) => {
|
||||
// Does it even make sense to write
|
||||
// [:find [(max ?x) ...] :where [_ :foo/bar ?x]]
|
||||
// ?
|
||||
// TODO
|
||||
unimplemented!();
|
||||
},
|
||||
&FindRel(ref elements) => {
|
||||
let values = elements.iter().map(|e| match e {
|
||||
&Element::Variable(ref var) => bindings.get(var).cloned().expect("every var to have a binding"),
|
||||
&Element::Variable(ref var) |
|
||||
&Element::Corresponding(ref var) => {
|
||||
bindings.get(var).cloned().expect("every var to have a binding")
|
||||
},
|
||||
&Element::Aggregate(ref _agg) => {
|
||||
// TODO: static computation of aggregates, then
|
||||
// implement the condition in `is_fully_bound`.
|
||||
unreachable!();
|
||||
},
|
||||
}).collect();
|
||||
QueryResults::Rel(vec![values])
|
||||
},
|
||||
|
@ -254,16 +325,16 @@ impl TypedIndex {
|
|||
/// Look up this index and type(index) pair in the provided row.
|
||||
/// This function will panic if:
|
||||
///
|
||||
/// - This is an `Unknown` and the retrieved type code isn't an i32.
|
||||
/// - This is an `Unknown` and the retrieved type tag isn't an i32.
|
||||
/// - If the retrieved value can't be coerced to a rusqlite `Value`.
|
||||
/// - Either index is out of bounds.
|
||||
///
|
||||
/// Because we construct our SQL projection list, the code that stored the data, and this
|
||||
/// Because we construct our SQL projection list, the tag that stored the data, and this
|
||||
/// consumer, a panic here implies that we have a bad bug — we put data of a very wrong type in
|
||||
/// a row, and thus can't coerce to Value, we're retrieving from the wrong place, or our
|
||||
/// generated SQL is junk.
|
||||
///
|
||||
/// This function will return a runtime error if the type code is unknown, or the value is
|
||||
/// This function will return a runtime error if the type tag is unknown, or the value is
|
||||
/// otherwise not convertible by the DB layer.
|
||||
fn lookup<'a, 'stmt>(&self, row: &Row<'a, 'stmt>) -> Result<TypedValue> {
|
||||
use TypedIndex::*;
|
||||
|
@ -282,17 +353,22 @@ impl TypedIndex {
|
|||
}
|
||||
}
|
||||
|
||||
fn candidate_column(cc: &ConjoiningClauses, var: &Variable) -> (ColumnOrExpression, Name) {
|
||||
fn cc_column(cc: &ConjoiningClauses, var: &Variable) -> Result<QualifiedAlias> {
|
||||
cc.column_bindings
|
||||
.get(var)
|
||||
.and_then(|cols| cols.get(0).cloned())
|
||||
.ok_or_else(|| ErrorKind::UnboundVariable(var.name()).into())
|
||||
}
|
||||
|
||||
fn candidate_column(cc: &ConjoiningClauses, var: &Variable) -> Result<(ColumnOrExpression, Name)> {
|
||||
// Every variable should be bound by the top-level CC to at least
|
||||
// one column in the query. If that constraint is violated it's a
|
||||
// bug in our code, so it's appropriate to panic here.
|
||||
let columns = cc.column_bindings
|
||||
.get(var)
|
||||
.expect(format!("Every variable should have a binding, but {:?} does not", var).as_str());
|
||||
|
||||
let qa = columns[0].clone();
|
||||
let name = VariableColumn::Variable(var.clone()).column_name();
|
||||
(ColumnOrExpression::Column(qa), name)
|
||||
cc_column(cc, var)
|
||||
.map(|qa| {
|
||||
let name = VariableColumn::Variable(var.clone()).column_name();
|
||||
(ColumnOrExpression::Column(qa), name)
|
||||
})
|
||||
}
|
||||
|
||||
fn candidate_type_column(cc: &ConjoiningClauses, var: &Variable) -> (ColumnOrExpression, Name) {
|
||||
|
@ -304,24 +380,216 @@ fn candidate_type_column(cc: &ConjoiningClauses, var: &Variable) -> (ColumnOrExp
|
|||
}
|
||||
|
||||
/// Return the projected column -- that is, a value or SQL column and an associated name -- for a
|
||||
/// given variable. Also return the type, if known.
|
||||
/// given variable. Also return the type.
|
||||
/// Callers are expected to determine whether to project a type tag as an additional SQL column.
|
||||
pub fn projected_column_for_var(var: &Variable, cc: &ConjoiningClauses) -> (ProjectedColumn, Option<ValueType>) {
|
||||
pub fn projected_column_for_var(var: &Variable, cc: &ConjoiningClauses) -> Result<(ProjectedColumn, ValueTypeSet)> {
|
||||
if let Some(value) = cc.bound_value(&var) {
|
||||
// If we already know the value, then our lives are easy.
|
||||
let tag = value.value_type();
|
||||
let name = VariableColumn::Variable(var.clone()).column_name();
|
||||
(ProjectedColumn(ColumnOrExpression::Value(value.clone()), name), Some(tag))
|
||||
Ok((ProjectedColumn(ColumnOrExpression::Value(value.clone()), name), ValueTypeSet::of_one(tag)))
|
||||
} else {
|
||||
// If we don't, then the CC *must* have bound the variable.
|
||||
let (column, name) = candidate_column(cc, var);
|
||||
(ProjectedColumn(column, name), cc.known_type(var))
|
||||
let (column, name) = candidate_column(cc, var)?;
|
||||
Ok((ProjectedColumn(column, name), cc.known_type_set(var)))
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns two values:
|
||||
/// - The `ColumnOrExpression` to use in the query. This will always refer to other
|
||||
/// variables by name; never to a datoms column.
|
||||
/// - The known type of that value.
|
||||
fn projected_column_for_simple_aggregate(simple: &SimpleAggregate, cc: &ConjoiningClauses) -> Result<(ProjectedColumn, ValueType)> {
|
||||
let known_types = cc.known_type_set(&simple.var);
|
||||
let return_type = simple.op.is_applicable_to_types(known_types)?;
|
||||
let projected_column_or_expression =
|
||||
if let Some(value) = cc.bound_value(&simple.var) {
|
||||
// Oh, we already know the value!
|
||||
if simple.use_static_value() {
|
||||
// We can statically compute the aggregate result for some operators -- not count or
|
||||
// sum, but avg/max/min are OK.
|
||||
ColumnOrExpression::Value(value)
|
||||
} else {
|
||||
let expression = Expression::Unary {
|
||||
sql_op: simple.op.to_sql(),
|
||||
arg: ColumnOrExpression::Value(value),
|
||||
};
|
||||
ColumnOrExpression::Expression(Box::new(expression), return_type)
|
||||
}
|
||||
} else {
|
||||
// The common case: the values are bound during execution.
|
||||
let name = VariableColumn::Variable(simple.var.clone()).column_name();
|
||||
let expression = Expression::Unary {
|
||||
sql_op: simple.op.to_sql(),
|
||||
arg: ColumnOrExpression::ExistingColumn(name),
|
||||
};
|
||||
ColumnOrExpression::Expression(Box::new(expression), return_type)
|
||||
};
|
||||
Ok((ProjectedColumn(projected_column_or_expression, simple.column_name()), return_type))
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
pub enum SimpleAggregationOp {
|
||||
Avg,
|
||||
Count,
|
||||
Max,
|
||||
Min,
|
||||
Sum,
|
||||
}
|
||||
|
||||
impl SimpleAggregationOp {
|
||||
fn to_sql(&self) -> &'static str {
|
||||
use SimpleAggregationOp::*;
|
||||
match self {
|
||||
&Avg => "avg",
|
||||
&Count => "count",
|
||||
&Max => "max",
|
||||
&Min => "min",
|
||||
&Sum => "sum",
|
||||
}
|
||||
}
|
||||
|
||||
fn for_function(function: &QueryFunction) -> Option<SimpleAggregationOp> {
|
||||
match function.0.plain_name() {
|
||||
"avg" => Some(SimpleAggregationOp::Avg),
|
||||
"count" => Some(SimpleAggregationOp::Count),
|
||||
"max" => Some(SimpleAggregationOp::Max),
|
||||
"min" => Some(SimpleAggregationOp::Min),
|
||||
"sum" => Some(SimpleAggregationOp::Sum),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// With knowledge of the types to which a variable might be bound,
|
||||
/// return a `Result` to determine whether this aggregation is suitable.
|
||||
/// For example, it's valid to take the `Avg` of `{Double, Long}`, invalid
|
||||
/// to take `Sum` of `{Instant}`, valid to take (lexicographic) `Max` of `{String}`,
|
||||
/// but invalid to take `Max` of `{Uuid, String}`.
|
||||
///
|
||||
/// The returned type is the type of the result of the aggregation.
|
||||
fn is_applicable_to_types(&self, possibilities: ValueTypeSet) -> Result<ValueType> {
|
||||
use SimpleAggregationOp::*;
|
||||
if possibilities.is_empty() {
|
||||
bail!(ErrorKind::CannotProjectImpossibleBinding(*self))
|
||||
}
|
||||
|
||||
match self {
|
||||
// One can always count results.
|
||||
&Count => Ok(ValueType::Long),
|
||||
|
||||
// Only numeric types can be averaged or summed.
|
||||
&Avg => {
|
||||
if possibilities.is_only_numeric() {
|
||||
// The mean of a set of numeric values will always, for our purposes, be a double.
|
||||
Ok(ValueType::Double)
|
||||
} else {
|
||||
bail!(ErrorKind::CannotApplyAggregateOperationToTypes(*self, possibilities))
|
||||
}
|
||||
},
|
||||
&Sum => {
|
||||
if possibilities.is_only_numeric() {
|
||||
if possibilities.contains(ValueType::Double) {
|
||||
Ok(ValueType::Double)
|
||||
} else {
|
||||
// TODO: BigInt.
|
||||
Ok(ValueType::Long)
|
||||
}
|
||||
} else {
|
||||
bail!(ErrorKind::CannotApplyAggregateOperationToTypes(*self, possibilities))
|
||||
}
|
||||
},
|
||||
|
||||
&Max | &Min => {
|
||||
if possibilities.is_unit() {
|
||||
use ValueType::*;
|
||||
let the_type = possibilities.exemplar().expect("a type");
|
||||
match the_type {
|
||||
// These types are numerically ordered.
|
||||
Double | Long | Instant => Ok(the_type),
|
||||
|
||||
// Boolean: false < true.
|
||||
Boolean => Ok(the_type),
|
||||
|
||||
// String: lexicographic order.
|
||||
String => Ok(the_type),
|
||||
|
||||
// These types are unordered.
|
||||
Keyword | Ref | Uuid => {
|
||||
bail!(ErrorKind::CannotApplyAggregateOperationToTypes(*self, possibilities))
|
||||
},
|
||||
}
|
||||
} else {
|
||||
// It cannot be empty -- we checked.
|
||||
// The only types that are valid to compare cross-type are numbers.
|
||||
if possibilities.is_only_numeric() {
|
||||
// Note that if the max/min is a Long, it will be returned as a Double!
|
||||
if possibilities.contains(ValueType::Double) {
|
||||
Ok(ValueType::Double)
|
||||
} else {
|
||||
// TODO: BigInt.
|
||||
Ok(ValueType::Long)
|
||||
}
|
||||
} else {
|
||||
bail!(ErrorKind::CannotApplyAggregateOperationToTypes(*self, possibilities))
|
||||
}
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct SimpleAggregate {
|
||||
op: SimpleAggregationOp,
|
||||
var: Variable,
|
||||
}
|
||||
|
||||
impl SimpleAggregate {
|
||||
fn column_name(&self) -> Name {
|
||||
format!("({} {})", self.op.to_sql(), self.var.name())
|
||||
}
|
||||
|
||||
fn use_static_value(&self) -> bool {
|
||||
use SimpleAggregationOp::*;
|
||||
match self.op {
|
||||
Avg | Max | Min => true,
|
||||
Count | Sum => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
trait SimpleAggregation {
|
||||
fn to_simple(&self) -> Option<SimpleAggregate>;
|
||||
}
|
||||
|
||||
impl SimpleAggregation for Aggregate {
|
||||
fn to_simple(&self) -> Option<SimpleAggregate> {
|
||||
if self.args.len() != 1 {
|
||||
return None;
|
||||
}
|
||||
self.args[0]
|
||||
.as_variable()
|
||||
.and_then(|v| SimpleAggregationOp::for_function(&self.func)
|
||||
.map(|op| SimpleAggregate { op, var: v.clone(), }))
|
||||
}
|
||||
}
|
||||
|
||||
/// An internal temporary struct to pass between the projection 'walk' and the
|
||||
/// resultant projector.
|
||||
/// Projection accumulates four things:
|
||||
/// - Two SQL projection lists. We need two because aggregate queries are nested
|
||||
/// in order to apply DISTINCT to values prior to aggregation.
|
||||
/// - A collection of templates for the projector to use to extract values.
|
||||
/// - A list of columns to use for grouping. Grouping is a property of the projection!
|
||||
struct ProjectedElements {
|
||||
sql_projection: Projection,
|
||||
pre_aggregate_projection: Option<Projection>,
|
||||
templates: Vec<TypedIndex>,
|
||||
group_by: Vec<GroupBy>,
|
||||
}
|
||||
|
||||
/// Walk an iterator of `Element`s, collecting projector templates and columns.
|
||||
///
|
||||
/// Returns a pair: the SQL projection (which should always be a `Projection::Columns`)
|
||||
/// Returns a `ProjectedElements`, which combines SQL projections
|
||||
/// and a `Vec` of `TypedIndex` 'keys' to use when looking up values.
|
||||
///
|
||||
/// Callers must ensure that every `Element` is distinct -- a query like
|
||||
|
@ -334,26 +602,56 @@ pub fn projected_column_for_var(var: &Variable, cc: &ConjoiningClauses) -> (Proj
|
|||
fn project_elements<'a, I: IntoIterator<Item = &'a Element>>(
|
||||
count: usize,
|
||||
elements: I,
|
||||
query: &AlgebraicQuery) -> Result<(Projection, Vec<TypedIndex>)> {
|
||||
query: &AlgebraicQuery) -> Result<ProjectedElements> {
|
||||
|
||||
// Give a little padding for type tags.
|
||||
let mut inner_projection = Vec::with_capacity(count + 2);
|
||||
|
||||
// Everything in the outer query will _either_ be an aggregate operation
|
||||
// _or_ a reference to a name projected from the inner.
|
||||
// We'll expand them later.
|
||||
let mut outer_projection: Vec<Either<Name, ProjectedColumn>> = Vec::with_capacity(count + 2);
|
||||
|
||||
let mut cols = Vec::with_capacity(count);
|
||||
let mut i: i32 = 0;
|
||||
let mut min_max_count: usize = 0;
|
||||
let mut corresponding_count: usize = 0;
|
||||
let mut templates = vec![];
|
||||
let mut with = query.with.clone();
|
||||
|
||||
let mut aggregates = false;
|
||||
|
||||
// Any variable that appears intact in the :find clause, not inside an aggregate expression.
|
||||
// "Query variables not in aggregate expressions will group the results and appear intact
|
||||
// in the result."
|
||||
// We use an ordered set here so that we group in the correct order.
|
||||
let mut outer_variables = IndexSet::new();
|
||||
|
||||
// Any variable that we are projecting from the inner query.
|
||||
let mut inner_variables = BTreeSet::new();
|
||||
|
||||
for e in elements {
|
||||
if let &Element::Corresponding(_) = e {
|
||||
corresponding_count += 1;
|
||||
}
|
||||
|
||||
match e {
|
||||
// Each time we come across a variable, we push a SQL column
|
||||
// into the SQL projection, aliased to the name of the variable,
|
||||
// and we push an annotated index into the projector.
|
||||
&Element::Variable(ref var) => {
|
||||
// If we're projecting this, we don't need it in :with.
|
||||
with.remove(var);
|
||||
&Element::Variable(ref var) |
|
||||
&Element::Corresponding(ref var) => {
|
||||
if outer_variables.contains(var) {
|
||||
eprintln!("Warning: duplicate variable {} in query.", var);
|
||||
}
|
||||
|
||||
let (projected_column, maybe_type) = projected_column_for_var(&var, &query.cc);
|
||||
cols.push(projected_column);
|
||||
if let Some(ty) = maybe_type {
|
||||
let tag = ty.value_type_tag();
|
||||
// TODO: it's an error to have `[:find ?x (the ?x) …]`.
|
||||
outer_variables.insert(var.clone());
|
||||
inner_variables.insert(var.clone());
|
||||
|
||||
let (projected_column, type_set) = projected_column_for_var(&var, &query.cc)?;
|
||||
outer_projection.push(Either::Left(projected_column.1.clone()));
|
||||
inner_projection.push(projected_column);
|
||||
|
||||
if let Some(tag) = type_set.unique_type_tag() {
|
||||
templates.push(TypedIndex::Known(i, tag));
|
||||
i += 1; // We used one SQL column.
|
||||
} else {
|
||||
|
@ -362,25 +660,213 @@ fn project_elements<'a, I: IntoIterator<Item = &'a Element>>(
|
|||
|
||||
// Also project the type from the SQL query.
|
||||
let (type_column, type_name) = candidate_type_column(&query.cc, &var);
|
||||
cols.push(ProjectedColumn(type_column, type_name));
|
||||
inner_projection.push(ProjectedColumn(type_column, type_name.clone()));
|
||||
outer_projection.push(Either::Left(type_name));
|
||||
}
|
||||
},
|
||||
&Element::Aggregate(ref a) => {
|
||||
if let Some(simple) = a.to_simple() {
|
||||
aggregates = true;
|
||||
|
||||
use SimpleAggregationOp::*;
|
||||
match simple.op {
|
||||
Max | Min => {
|
||||
min_max_count += 1;
|
||||
},
|
||||
Avg | Count | Sum => (),
|
||||
}
|
||||
|
||||
// When we encounter a simple aggregate -- one in which the aggregation can be
|
||||
// implemented in SQL, on a single variable -- we just push the SQL aggregation op.
|
||||
// We must ensure the following:
|
||||
// - There's a column for the var.
|
||||
// - The type of the var is known to be restricted to a sensible input set
|
||||
// (not necessarily a single type, but e.g., all vals must be Double or Long).
|
||||
// - The type set must be appropriate for the operation. E.g., `Sum` is not a
|
||||
// meaningful operation on instants.
|
||||
|
||||
let (projected_column, return_type) = projected_column_for_simple_aggregate(&simple, &query.cc)?;
|
||||
outer_projection.push(Either::Right(projected_column));
|
||||
|
||||
if !inner_variables.contains(&simple.var) {
|
||||
inner_variables.insert(simple.var.clone());
|
||||
let (projected_column, _type_set) = projected_column_for_var(&simple.var, &query.cc)?;
|
||||
inner_projection.push(projected_column);
|
||||
if query.cc.known_type_set(&simple.var).unique_type_tag().is_none() {
|
||||
// Also project the type from the SQL query.
|
||||
let (type_column, type_name) = candidate_type_column(&query.cc, &simple.var);
|
||||
inner_projection.push(ProjectedColumn(type_column, type_name.clone()));
|
||||
}
|
||||
}
|
||||
|
||||
// We might regret using the type tag here instead of the `ValueType`.
|
||||
templates.push(TypedIndex::Known(i, return_type.value_type_tag()));
|
||||
i += 1;
|
||||
} else {
|
||||
// TODO: complex aggregates.
|
||||
bail!(ErrorKind::NotYetImplemented("complex aggregates".into()));
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
match (min_max_count, corresponding_count) {
|
||||
(0, 0) | (_, 0) => {},
|
||||
(0, _) => {
|
||||
eprintln!("Warning: used `(the ?var)` without `min` or `max`.");
|
||||
},
|
||||
(1, _) => {
|
||||
// This is the success case!
|
||||
},
|
||||
(n, c) => {
|
||||
bail!(ErrorKind::AmbiguousAggregates(n, c));
|
||||
},
|
||||
}
|
||||
|
||||
// Anything used in ORDER BY (which we're given in `named_projection`)
|
||||
// needs to be in the SQL column list so we can refer to it by name.
|
||||
//
|
||||
// They don't affect projection.
|
||||
//
|
||||
// If a variable is of a non-fixed type, also project the type tag column, so we don't
|
||||
// accidentally unify across types when considering uniqueness!
|
||||
for var in query.named_projection.iter() {
|
||||
if outer_variables.contains(var) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// If it's a fixed value, we need do nothing further.
|
||||
if query.cc.is_value_bound(&var) {
|
||||
continue;
|
||||