This uses a `SELECT *` from an inner subselect to filter potentially `NULL` aggregates. The alternative is to handle `NULL` values throughout the projector, which is simple but loses a valuable invariant: Mentat SQL queries produce values that are not `NULL`.
This commit is contained in:
parent
2a025916fe
commit
d8d18a1731
6 changed files with 283 additions and 14 deletions
|
@ -330,7 +330,7 @@ impl Debug for QueryValue {
|
||||||
|
|
||||||
/// Represents an entry in the ORDER BY list: a variable or a variable's type tag.
|
/// Represents an entry in the ORDER BY list: a variable or a variable's type tag.
|
||||||
/// (We require order vars to be projected, so we can simply use a variable here.)
|
/// (We require order vars to be projected, so we can simply use a variable here.)
|
||||||
#[derive(Debug)]
|
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||||
pub struct OrderBy(pub Direction, pub VariableColumn);
|
pub struct OrderBy(pub Direction, pub VariableColumn);
|
||||||
|
|
||||||
impl From<Order> for OrderBy {
|
impl From<Order> for OrderBy {
|
||||||
|
|
|
@ -164,6 +164,15 @@ impl SimpleAggregate {
|
||||||
Count | Sum => false,
|
Count | Sum => false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return `true` if this aggregate can be `NULL` over 0 rows.
|
||||||
|
pub(crate) fn is_nullable(&self) -> bool {
|
||||||
|
use self::SimpleAggregationOp::*;
|
||||||
|
match self.op {
|
||||||
|
Avg | Max | Min => true,
|
||||||
|
Count | Sum => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) trait SimpleAggregation {
|
pub(crate) trait SimpleAggregation {
|
||||||
|
@ -201,7 +210,11 @@ pub(crate) fn projected_column_for_simple_aggregate(simple: &SimpleAggregate, cc
|
||||||
sql_op: simple.op.to_sql(),
|
sql_op: simple.op.to_sql(),
|
||||||
arg: ColumnOrExpression::Value(value),
|
arg: ColumnOrExpression::Value(value),
|
||||||
};
|
};
|
||||||
ColumnOrExpression::Expression(Box::new(expression), return_type)
|
if simple.is_nullable() {
|
||||||
|
ColumnOrExpression::NullableAggregate(Box::new(expression), return_type)
|
||||||
|
} else {
|
||||||
|
ColumnOrExpression::Expression(Box::new(expression), return_type)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// The common case: the values are bound during execution.
|
// The common case: the values are bound during execution.
|
||||||
|
@ -210,7 +223,11 @@ pub(crate) fn projected_column_for_simple_aggregate(simple: &SimpleAggregate, cc
|
||||||
sql_op: simple.op.to_sql(),
|
sql_op: simple.op.to_sql(),
|
||||||
arg: ColumnOrExpression::ExistingColumn(name),
|
arg: ColumnOrExpression::ExistingColumn(name),
|
||||||
};
|
};
|
||||||
ColumnOrExpression::Expression(Box::new(expression), return_type)
|
if simple.is_nullable() {
|
||||||
|
ColumnOrExpression::NullableAggregate(Box::new(expression), return_type)
|
||||||
|
} else {
|
||||||
|
ColumnOrExpression::Expression(Box::new(expression), return_type)
|
||||||
|
}
|
||||||
};
|
};
|
||||||
Ok((ProjectedColumn(projected_column_or_expression, simple.column_name()), return_type))
|
Ok((ProjectedColumn(projected_column_or_expression, simple.column_name()), return_type))
|
||||||
}
|
}
|
||||||
|
|
|
@ -62,7 +62,10 @@ pub enum ColumnOrExpression {
|
||||||
Integer(i32), // We use these for type codes etc.
|
Integer(i32), // We use these for type codes etc.
|
||||||
Long(i64),
|
Long(i64),
|
||||||
Value(TypedValue),
|
Value(TypedValue),
|
||||||
Expression(Box<Expression>, ValueType), // Track the return type.
|
// Some aggregates (`min`, `max`, `avg`) can be over 0 rows, and therefore can be `NULL`; that
|
||||||
|
// needs special treatment.
|
||||||
|
NullableAggregate(Box<Expression>, ValueType), // Track the return type.
|
||||||
|
Expression(Box<Expression>, ValueType), // Track the return type.
|
||||||
}
|
}
|
||||||
|
|
||||||
pub enum Expression {
|
pub enum Expression {
|
||||||
|
@ -130,6 +133,12 @@ pub enum Constraint {
|
||||||
left: ColumnOrExpression,
|
left: ColumnOrExpression,
|
||||||
list: Vec<ColumnOrExpression>,
|
list: Vec<ColumnOrExpression>,
|
||||||
},
|
},
|
||||||
|
IsNull {
|
||||||
|
value: ColumnOrExpression,
|
||||||
|
},
|
||||||
|
IsNotNull {
|
||||||
|
value: ColumnOrExpression,
|
||||||
|
},
|
||||||
NotExists {
|
NotExists {
|
||||||
subquery: TableOrSubquery,
|
subquery: TableOrSubquery,
|
||||||
},
|
},
|
||||||
|
@ -278,6 +287,7 @@ impl QueryFragment for ColumnOrExpression {
|
||||||
&Value(ref v) => {
|
&Value(ref v) => {
|
||||||
out.push_typed_value(v)
|
out.push_typed_value(v)
|
||||||
},
|
},
|
||||||
|
&NullableAggregate(ref e, _) |
|
||||||
&Expression(ref e, _) => {
|
&Expression(ref e, _) => {
|
||||||
e.push_sql(out)
|
e.push_sql(out)
|
||||||
},
|
},
|
||||||
|
@ -343,6 +353,18 @@ impl QueryFragment for Constraint {
|
||||||
right.push_sql(out)
|
right.push_sql(out)
|
||||||
},
|
},
|
||||||
|
|
||||||
|
&IsNull { ref value } => {
|
||||||
|
value.push_sql(out)?;
|
||||||
|
out.push_sql(" IS NULL");
|
||||||
|
Ok(())
|
||||||
|
},
|
||||||
|
|
||||||
|
&IsNotNull { ref value } => {
|
||||||
|
value.push_sql(out)?;
|
||||||
|
out.push_sql(" IS NOT NULL");
|
||||||
|
Ok(())
|
||||||
|
},
|
||||||
|
|
||||||
&And { ref constraints } => {
|
&And { ref constraints } => {
|
||||||
// An empty intersection is true.
|
// An empty intersection is true.
|
||||||
if constraints.is_empty() {
|
if constraints.is_empty() {
|
||||||
|
|
|
@ -427,14 +427,63 @@ fn re_project(mut inner: SelectQuery, projection: Projection) -> SelectQuery {
|
||||||
let limit = inner.limit;
|
let limit = inner.limit;
|
||||||
inner.limit = Limit::None;
|
inner.limit = Limit::None;
|
||||||
|
|
||||||
SelectQuery {
|
use self::Projection::*;
|
||||||
|
|
||||||
|
let nullable = match &projection {
|
||||||
|
&Columns(ref columns) => {
|
||||||
|
columns.iter().filter_map(|pc| {
|
||||||
|
match pc {
|
||||||
|
&ProjectedColumn(ColumnOrExpression::NullableAggregate(_, _), ref name) => {
|
||||||
|
Some(Constraint::IsNotNull {
|
||||||
|
value: ColumnOrExpression::ExistingColumn(name.clone()),
|
||||||
|
})
|
||||||
|
},
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}).collect()
|
||||||
|
},
|
||||||
|
&Star => vec![],
|
||||||
|
&One => vec![],
|
||||||
|
};
|
||||||
|
|
||||||
|
if nullable.is_empty() {
|
||||||
|
return SelectQuery {
|
||||||
|
distinct: outer_distinct,
|
||||||
|
projection: projection,
|
||||||
|
from: FromClause::TableList(TableList(vec![TableOrSubquery::Subquery(Box::new(inner))])),
|
||||||
|
constraints: vec![],
|
||||||
|
group_by: group_by,
|
||||||
|
order: order_by,
|
||||||
|
limit: limit,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Our pattern is `SELECT * FROM (SELECT ...) WHERE (nullable aggregate) IS NOT NULL`. If
|
||||||
|
// there's an `ORDER BY` in the subselect, SQL does not guarantee that the outer select will
|
||||||
|
// respect that order. But `ORDER BY` is relevant to the subselect when we have a `LIMIT`.
|
||||||
|
// Thus we lift the `ORDER BY` if there’s no `LIMIT` in the subselect, and repeat the `ORDER BY`
|
||||||
|
// if there is.
|
||||||
|
let subselect = SelectQuery {
|
||||||
distinct: outer_distinct,
|
distinct: outer_distinct,
|
||||||
projection: projection,
|
projection: projection,
|
||||||
from: FromClause::TableList(TableList(vec![TableOrSubquery::Subquery(Box::new(inner))])),
|
from: FromClause::TableList(TableList(vec![TableOrSubquery::Subquery(Box::new(inner))])),
|
||||||
constraints: vec![],
|
constraints: vec![],
|
||||||
group_by: group_by,
|
group_by: group_by,
|
||||||
|
order: match &limit {
|
||||||
|
&Limit::None => vec![],
|
||||||
|
&Limit::Fixed(_) | &Limit::Variable(_) => order_by.clone(),
|
||||||
|
},
|
||||||
|
limit,
|
||||||
|
};
|
||||||
|
|
||||||
|
SelectQuery {
|
||||||
|
distinct: false,
|
||||||
|
projection: Projection::Star,
|
||||||
|
from: FromClause::TableList(TableList(vec![TableOrSubquery::Subquery(Box::new(subselect))])),
|
||||||
|
constraints: nullable,
|
||||||
|
group_by: vec![],
|
||||||
order: order_by,
|
order: order_by,
|
||||||
limit: limit,
|
limit: Limit::None, // Any limiting comes from the internal query.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1091,14 +1091,17 @@ fn test_project_aggregates() {
|
||||||
let SQLQuery { sql, args } = translate(&schema, query);
|
let SQLQuery { sql, args } = translate(&schema, query);
|
||||||
|
|
||||||
// No outer DISTINCT: we aggregate or group by every variable.
|
// No outer DISTINCT: we aggregate or group by every variable.
|
||||||
assert_eq!(sql, "SELECT `?e` AS `?e`, max(`?t`) AS `(max ?t)` \
|
assert_eq!(sql, "SELECT * \
|
||||||
FROM \
|
FROM \
|
||||||
(SELECT DISTINCT \
|
(SELECT `?e` AS `?e`, max(`?t`) AS `(max ?t)` \
|
||||||
`datoms00`.e AS `?e`, \
|
FROM \
|
||||||
`datoms00`.v AS `?t` \
|
(SELECT DISTINCT \
|
||||||
FROM `datoms` AS `datoms00` \
|
`datoms00`.e AS `?e`, \
|
||||||
WHERE `datoms00`.a = 99) \
|
`datoms00`.v AS `?t` \
|
||||||
GROUP BY `?e`");
|
FROM `datoms` AS `datoms00` \
|
||||||
|
WHERE `datoms00`.a = 99) \
|
||||||
|
GROUP BY `?e`) \
|
||||||
|
WHERE `(max ?t)` IS NOT NULL");
|
||||||
assert_eq!(args, vec![]);
|
assert_eq!(args, vec![]);
|
||||||
|
|
||||||
let query = r#"[:find (max ?t)
|
let query = r#"[:find (max ?t)
|
||||||
|
@ -1106,7 +1109,75 @@ fn test_project_aggregates() {
|
||||||
:where
|
:where
|
||||||
[?e :foo/bar ?t]]"#;
|
[?e :foo/bar ?t]]"#;
|
||||||
let SQLQuery { sql, args } = translate(&schema, query);
|
let SQLQuery { sql, args } = translate(&schema, query);
|
||||||
assert_eq!(sql, "SELECT max(`?t`) AS `(max ?t)` \
|
assert_eq!(sql, "SELECT * \
|
||||||
|
FROM \
|
||||||
|
(SELECT max(`?t`) AS `(max ?t)` \
|
||||||
|
FROM \
|
||||||
|
(SELECT DISTINCT \
|
||||||
|
`datoms00`.v AS `?t`, \
|
||||||
|
`datoms00`.e AS `?e` \
|
||||||
|
FROM `datoms` AS `datoms00` \
|
||||||
|
WHERE `datoms00`.a = 99)\
|
||||||
|
) \
|
||||||
|
WHERE `(max ?t)` IS NOT NULL");
|
||||||
|
assert_eq!(args, vec![]);
|
||||||
|
|
||||||
|
// ORDER BY lifted to outer query if there is no LIMIT.
|
||||||
|
let query = r#"[:find (max ?x)
|
||||||
|
:with ?e
|
||||||
|
:where
|
||||||
|
[?e ?a ?t]
|
||||||
|
[?t :foo/bar ?x]
|
||||||
|
:order ?a]"#;
|
||||||
|
let SQLQuery { sql, args } = translate(&schema, query);
|
||||||
|
assert_eq!(sql, "SELECT * \
|
||||||
|
FROM \
|
||||||
|
(SELECT max(`?x`) AS `(max ?x)`, `?a` AS `?a` \
|
||||||
|
FROM \
|
||||||
|
(SELECT DISTINCT \
|
||||||
|
`datoms01`.v AS `?x`, \
|
||||||
|
`datoms00`.a AS `?a`, \
|
||||||
|
`datoms00`.e AS `?e` \
|
||||||
|
FROM `datoms` AS `datoms00`, `datoms` AS `datoms01` \
|
||||||
|
WHERE `datoms01`.a = 99 AND `datoms00`.v = `datoms01`.e) \
|
||||||
|
GROUP BY `?a`) \
|
||||||
|
WHERE `(max ?x)` IS NOT NULL \
|
||||||
|
ORDER BY `?a` ASC");
|
||||||
|
assert_eq!(args, vec![]);
|
||||||
|
|
||||||
|
// ORDER BY duplicated in outer query if there is a LIMIT.
|
||||||
|
let query = r#"[:find (max ?x)
|
||||||
|
:with ?e
|
||||||
|
:where
|
||||||
|
[?e ?a ?t]
|
||||||
|
[?t :foo/bar ?x]
|
||||||
|
:order (desc ?a)
|
||||||
|
:limit 10]"#;
|
||||||
|
let SQLQuery { sql, args } = translate(&schema, query);
|
||||||
|
assert_eq!(sql, "SELECT * \
|
||||||
|
FROM \
|
||||||
|
(SELECT max(`?x`) AS `(max ?x)`, `?a` AS `?a` \
|
||||||
|
FROM \
|
||||||
|
(SELECT DISTINCT \
|
||||||
|
`datoms01`.v AS `?x`, \
|
||||||
|
`datoms00`.a AS `?a`, \
|
||||||
|
`datoms00`.e AS `?e` \
|
||||||
|
FROM `datoms` AS `datoms00`, `datoms` AS `datoms01` \
|
||||||
|
WHERE `datoms01`.a = 99 AND `datoms00`.v = `datoms01`.e) \
|
||||||
|
GROUP BY `?a` \
|
||||||
|
ORDER BY `?a` DESC \
|
||||||
|
LIMIT 10) \
|
||||||
|
WHERE `(max ?x)` IS NOT NULL \
|
||||||
|
ORDER BY `?a` DESC");
|
||||||
|
assert_eq!(args, vec![]);
|
||||||
|
|
||||||
|
// No outer SELECT * for non-nullable aggregates.
|
||||||
|
let query = r#"[:find (count ?t)
|
||||||
|
:with ?e
|
||||||
|
:where
|
||||||
|
[?e :foo/bar ?t]]"#;
|
||||||
|
let SQLQuery { sql, args } = translate(&schema, query);
|
||||||
|
assert_eq!(sql, "SELECT count(`?t`) AS `(count ?t)` \
|
||||||
FROM \
|
FROM \
|
||||||
(SELECT DISTINCT \
|
(SELECT DISTINCT \
|
||||||
`datoms00`.v AS `?t`, \
|
`datoms00`.v AS `?t`, \
|
||||||
|
@ -1116,6 +1187,28 @@ fn test_project_aggregates() {
|
||||||
assert_eq!(args, vec![]);
|
assert_eq!(args, vec![]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_project_the() {
|
||||||
|
let schema = prepopulated_typed_schema(ValueType::Long);
|
||||||
|
let query = r#"[:find (the ?e) (max ?t)
|
||||||
|
:where
|
||||||
|
[?e :foo/bar ?t]]"#;
|
||||||
|
let SQLQuery { sql, args } = translate(&schema, query);
|
||||||
|
|
||||||
|
// We shouldn't NULL-check (the).
|
||||||
|
assert_eq!(sql, "SELECT * \
|
||||||
|
FROM \
|
||||||
|
(SELECT `?e` AS `?e`, max(`?t`) AS `(max ?t)` \
|
||||||
|
FROM \
|
||||||
|
(SELECT DISTINCT \
|
||||||
|
`datoms00`.e AS `?e`, \
|
||||||
|
`datoms00`.v AS `?t` \
|
||||||
|
FROM `datoms` AS `datoms00` \
|
||||||
|
WHERE `datoms00`.a = 99)) \
|
||||||
|
WHERE `(max ?t)` IS NOT NULL");
|
||||||
|
assert_eq!(args, vec![]);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_tx_before_and_after() {
|
fn test_tx_before_and_after() {
|
||||||
let schema = prepopulated_typed_schema(ValueType::Long);
|
let schema = prepopulated_typed_schema(ValueType::Long);
|
||||||
|
|
|
@ -1193,6 +1193,94 @@ fn test_aggregate_the() {
|
||||||
TypedValue::typed_string("(1) Facebook")]].into());
|
TypedValue::typed_string("(1) Facebook")]].into());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_null_aggregates() {
|
||||||
|
let store = Store::open("").expect("opened");
|
||||||
|
|
||||||
|
let rel =
|
||||||
|
store.q_once(r#"
|
||||||
|
[:find (count ?tx) (max ?txInstant)
|
||||||
|
:where [_ _ _ ?tx]
|
||||||
|
[?tx :db/txInstant ?txInstant]
|
||||||
|
[(< ?txInstant #inst "2016-01-01T11:00:00.000Z")]
|
||||||
|
]"#, None)
|
||||||
|
.into_rel_result()
|
||||||
|
.expect("no results");
|
||||||
|
|
||||||
|
// (count ?tx) is 0, but (max ?txInstant) is over 0 SQL rows, yielding a NULL in the SQL rows.
|
||||||
|
// We reject the entire row containing NULL aggregates.
|
||||||
|
assert_eq!(0, rel.row_count());
|
||||||
|
|
||||||
|
let rel_pull =
|
||||||
|
store.q_once(r#"
|
||||||
|
[:find (count ?tx) (max ?txInstant) (pull ?tx [*])
|
||||||
|
:where [_ _ _ ?tx]
|
||||||
|
[?tx :db/txInstant ?txInstant]
|
||||||
|
[(< ?txInstant #inst "2016-01-01T11:00:00.000Z")]
|
||||||
|
]"#, None)
|
||||||
|
.into_rel_result()
|
||||||
|
.expect("no results");
|
||||||
|
|
||||||
|
// Same logic as above -- just verifying that `RelTwoStagePullProjector` handles NULL.
|
||||||
|
assert_eq!(0, rel_pull.row_count());
|
||||||
|
|
||||||
|
let coll =
|
||||||
|
store.q_once(r#"
|
||||||
|
[:find [(max ?txInstant) ...]
|
||||||
|
:where [_ _ _ ?tx]
|
||||||
|
[?tx :db/txInstant ?txInstant]
|
||||||
|
[(< ?txInstant #inst "2016-01-01T11:00:00.000Z")]
|
||||||
|
]"#, None)
|
||||||
|
.into_coll_result()
|
||||||
|
.expect("no results");
|
||||||
|
|
||||||
|
// (max ?txInstant) is over 0 SQL rows, yielding a NULL in the SQL rows. We reject the entire
|
||||||
|
// row containing NULL aggregates, yielding an empty vector of results.
|
||||||
|
assert_eq!(coll, vec![]);
|
||||||
|
|
||||||
|
let tuple =
|
||||||
|
store.q_once(r#"
|
||||||
|
[:find [(count ?tx) (max ?txInstant)]
|
||||||
|
:where [_ _ _ ?tx]
|
||||||
|
[?tx :db/txInstant ?txInstant]
|
||||||
|
[(< ?txInstant #inst "2016-01-01T11:00:00.000Z")]
|
||||||
|
]"#, None)
|
||||||
|
.into_tuple_result()
|
||||||
|
.expect("no results");
|
||||||
|
|
||||||
|
// (count ?tx) is 0, but (max ?txInstant) is over 0 SQL rows, yielding a NULL in the SQL rows.
|
||||||
|
// We reject the entire row containing NULL aggregates, yielding no tuple result at all.
|
||||||
|
assert_eq!(tuple, None);
|
||||||
|
|
||||||
|
|
||||||
|
let tuple_pull =
|
||||||
|
store.q_once(r#"
|
||||||
|
[:find [(count ?tx) (max ?txInstant) (pull ?tx [*])]
|
||||||
|
:where [_ _ _ ?tx]
|
||||||
|
[?tx :db/txInstant ?txInstant]
|
||||||
|
[(< ?txInstant #inst "2016-01-01T11:00:00.000Z")]
|
||||||
|
]"#, None)
|
||||||
|
.into_tuple_result()
|
||||||
|
.expect("no results");
|
||||||
|
|
||||||
|
// Same logic as above -- just verifying that `CollTwoStagePullProjector` handles NULL.
|
||||||
|
assert_eq!(tuple_pull, None);
|
||||||
|
|
||||||
|
let scalar =
|
||||||
|
store.q_once(r#"
|
||||||
|
[:find (max ?txInstant) .
|
||||||
|
:where [_ _ _ ?tx]
|
||||||
|
[?tx :db/txInstant ?txInstant]
|
||||||
|
[(< ?txInstant #inst "2016-01-01T11:00:00.000Z")]
|
||||||
|
]"#, None)
|
||||||
|
.into_scalar_result()
|
||||||
|
.expect("no results");
|
||||||
|
|
||||||
|
// (max ?txInstant) is over 0 SQL rows, yielding a NULL in the SQL rows. We reject the entire
|
||||||
|
// row containing NULL aggregates, yielding no scalar result at all.
|
||||||
|
assert_eq!(scalar, None);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_aggregation_implicit_grouping() {
|
fn test_aggregation_implicit_grouping() {
|
||||||
let mut store = Store::open("").expect("opened");
|
let mut store = Store::open("").expect("opened");
|
||||||
|
|
Loading…
Reference in a new issue